editalign 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +4 -2
- data/examples/align-strings +34 -28
- data/examples/stress-test +23 -16
- data/lib/editalign.rb +30 -26
- data/test/test_editalign.rb +2 -2
- metadata +17 -24
data/README
CHANGED
@@ -43,6 +43,8 @@ arrays. See the EditAlign::DijkstraSearch class for details.
|
|
43
43
|
= History
|
44
44
|
|
45
45
|
* 1-0-0 ... First version
|
46
|
+
* 1-1-0 ... Added EditAlign::VERSION
|
47
|
+
* 1-1-1 ... Improved command-line handling in align-strings and stress-test
|
46
48
|
|
47
49
|
= See Also
|
48
50
|
|
@@ -55,11 +57,11 @@ strategy and optimizations to the one implemented here.
|
|
55
57
|
|
56
58
|
= Copyright
|
57
59
|
|
58
|
-
Copyright 2006, William Patrick McNeill
|
60
|
+
Copyright 2006-2009, William Patrick McNeill
|
59
61
|
|
60
62
|
This program is distributed under the GNU General Public License.
|
61
63
|
|
62
64
|
= Author
|
63
65
|
|
64
|
-
W.P. McNeill mailto:billmcn@
|
66
|
+
W.P. McNeill mailto:billmcn@gmail.com
|
65
67
|
|
data/examples/align-strings
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
#!/bin/env ruby
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
#--
|
4
|
-
# Copyright 2006 William Patrick McNeill
|
4
|
+
# Copyright 2006-2009 William Patrick McNeill
|
5
5
|
#
|
6
6
|
# This file is part of Editalign.
|
7
7
|
#
|
@@ -24,49 +24,55 @@
|
|
24
24
|
# Print the character alignment between two strings passed in on the
|
25
25
|
# command line.
|
26
26
|
|
27
|
-
require '
|
27
|
+
require 'optparse'
|
28
28
|
require 'editalign'
|
29
29
|
|
30
30
|
class ExhaustiveSellersAlignment < EditAlign::SellersAlignment
|
31
31
|
include EditAlign::ExhaustiveSearch
|
32
32
|
end
|
33
33
|
|
34
|
-
# Process command line options.
|
35
|
-
opts = GetoptLong.new(["--match", "-m", GetoptLong::REQUIRED_ARGUMENT],
|
36
|
-
["--nomatch", "-n", GetoptLong::REQUIRED_ARGUMENT],
|
37
|
-
["--insert", "-i",GetoptLong::REQUIRED_ARGUMENT],
|
38
|
-
["--delete", "-d", GetoptLong::REQUIRED_ARGUMENT],
|
39
|
-
["--exhaustive", "-e", GetoptLong::NO_ARGUMENT]
|
40
|
-
)
|
41
|
-
|
42
34
|
match = 0
|
43
|
-
|
35
|
+
substitute = 1
|
44
36
|
insert = 1
|
45
37
|
delete = 1
|
46
38
|
exhaustive = false
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
exhaustive = true
|
39
|
+
OptionParser.new do |opts|
|
40
|
+
opts.banner =<<-EOTEXT
|
41
|
+
#{File.basename(__FILE__)} [OPTION] source dest
|
42
|
+
|
43
|
+
Perform an edit distance alignment between the source and dest strings.
|
44
|
+
|
45
|
+
The options may be used to specify match, substitution, insertion, and
|
46
|
+
deletion costs.
|
47
|
+
EOTEXT
|
48
|
+
opts.on("-m", "--match COST", Float, "Match cost (default 0)") do |value|
|
49
|
+
match = value
|
59
50
|
end
|
60
|
-
|
51
|
+
opts.on("-s", "--substitution COST", Float, "Substitution cost (default 1)") do |value|
|
52
|
+
substitute = value
|
53
|
+
end
|
54
|
+
opts.on("-i", "--insertion COST", Float, "Insertion cost (default 1)") do |value|
|
55
|
+
insert = value
|
56
|
+
end
|
57
|
+
opts.on("-d", "--deletion COST", Float, "Deletion cost (default 1)") do |value|
|
58
|
+
delete = value
|
59
|
+
end
|
60
|
+
opts.on("-e", "--exhaustive", "Do exhaustive search for comparison (default false)") do |value|
|
61
|
+
exhaustive = value
|
62
|
+
end
|
63
|
+
end.parse!
|
61
64
|
|
62
|
-
|
63
|
-
|
65
|
+
if not ARGV.length == 2
|
66
|
+
puts "Incorrect number of arguments."
|
67
|
+
exit(0)
|
68
|
+
end
|
69
|
+
source, dest = ARGV
|
64
70
|
|
65
71
|
# Do alignments and print results.
|
66
72
|
alignments = [EditAlign::SellersAlignment]
|
67
73
|
alignments << ExhaustiveSellersAlignment if exhaustive
|
68
74
|
|
69
75
|
alignments.each do |align_class|
|
70
|
-
a = align_class.new(source, dest, match,
|
76
|
+
a = align_class.new(source, dest, match, substitute, insert, delete)
|
71
77
|
puts a, a.to_grid
|
72
78
|
end
|
data/examples/stress-test
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
#!/bin/env ruby
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
#--
|
4
|
-
# Copyright 2006 William Patrick McNeill
|
4
|
+
# Copyright 2006-2009 William Patrick McNeill
|
5
5
|
#
|
6
6
|
# This file is part of Editalign.
|
7
7
|
#
|
@@ -21,7 +21,7 @@
|
|
21
21
|
#
|
22
22
|
#++
|
23
23
|
|
24
|
-
require "
|
24
|
+
require "optparse"
|
25
25
|
require "editalign"
|
26
26
|
|
27
27
|
class PureRubyEditAlign < EditAlign::Alignment
|
@@ -51,24 +51,31 @@ end
|
|
51
51
|
# * verbose ... prints strings and edit distances
|
52
52
|
# * pure-ruby ... uses the pure Ruby priority queue instead of the C extension
|
53
53
|
# * int-cost ... uses integer costs
|
54
|
-
def parse_command_line
|
55
|
-
opts = GetoptLong.new(["--verbose", "-v", GetoptLong::NO_ARGUMENT],
|
56
|
-
["--pure-ruby", "-p", GetoptLong::NO_ARGUMENT],
|
57
|
-
["--int-cost", "-i", GetoptLong::NO_ARGUMENT])
|
54
|
+
def parse_command_line
|
58
55
|
verbose = false
|
59
56
|
pure_ruby = false
|
60
57
|
int_cost = false
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
int_cost =
|
58
|
+
OptionParser.new do |opts|
|
59
|
+
opts.banner =<<-EOTEXT
|
60
|
+
#{File.basename(__FILE__)} [OPTION] trials length edits
|
61
|
+
|
62
|
+
Perform a stress test on the alignment code.
|
63
|
+
EOTEXT
|
64
|
+
opts.on("-i", "--int-cost", "Use integer costs") do |value|
|
65
|
+
int_cost = value
|
69
66
|
end
|
67
|
+
opts.on("-p", "--pure-ruby", "Use the pure Ruby priority queue instead of the C extension") do |value|
|
68
|
+
pure_ruby = value
|
69
|
+
end
|
70
|
+
opts.on("-v", "--verbose", "Print strings and edit distances") do |value|
|
71
|
+
verbose = value
|
72
|
+
end
|
73
|
+
end.parse!
|
74
|
+
|
75
|
+
if not ARGV.length == 3
|
76
|
+
puts "Incorrect number of arguments."
|
77
|
+
exit(0)
|
70
78
|
end
|
71
|
-
|
72
79
|
trials = Integer(ARGV[0])
|
73
80
|
length = Integer(ARGV[1])
|
74
81
|
edits = Integer(ARGV[2])
|
data/lib/editalign.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2006 William Patrick McNeill
|
1
|
+
# Copyright 2006-2009 William Patrick McNeill
|
2
2
|
#
|
3
3
|
# Editalign is free software; you can redistribute it and/or modify it
|
4
4
|
# under the terms of the GNU General Public License as published by
|
@@ -18,6 +18,8 @@
|
|
18
18
|
# functions.
|
19
19
|
module EditAlign
|
20
20
|
|
21
|
+
VERSION = "1.1.1"
|
22
|
+
|
21
23
|
# This module employs Dijkstra's algorithm to find the lowest-cost
|
22
24
|
# sequence of edit operations that will transform the source array
|
23
25
|
# into the destination array. The alignment grid is treated as a
|
@@ -86,8 +88,9 @@ module EditAlign
|
|
86
88
|
next unless next_cost < @cost[next_cell]
|
87
89
|
@cost[next_cell] = next_cost
|
88
90
|
@backtrace[next_cell] = cell
|
89
|
-
agenda[next_cell] = priority_factory(next_cost, next_cell) \
|
90
91
|
unless next_cost >= @cost[@end]
|
92
|
+
agenda[next_cell] = priority_factory(next_cost, next_cell)
|
93
|
+
end
|
91
94
|
end
|
92
95
|
end
|
93
96
|
end
|
@@ -207,32 +210,31 @@ module EditAlign
|
|
207
210
|
|
208
211
|
|
209
212
|
# The Alignment class is given a source and destination array at
|
210
|
-
# construction time. It does a dynamic programming alignment
|
211
|
-
#
|
212
|
-
#
|
213
|
+
# construction time. It does a dynamic programming alignment between them
|
214
|
+
# and makes the results of that alignment available through instance
|
215
|
+
# methods.
|
213
216
|
#
|
214
|
-
# If there are multiple alignments with equal edit distances
|
215
|
-
#
|
217
|
+
# If there are multiple alignments with equal edit distances Alignment will
|
218
|
+
# find one of them. Which one is undefined.
|
216
219
|
#
|
217
|
-
# Alignment works by constructing a matrix with dimensions equal to
|
218
|
-
#
|
219
|
-
#
|
220
|
-
#
|
221
|
-
#
|
222
|
-
#
|
223
|
-
#
|
220
|
+
# Alignment works by constructing a matrix with dimensions equal to the
|
221
|
+
# length of the source and destination arrays. Moving horizontally and
|
222
|
+
# vertically in the matrix represents insertion and deletion operations,
|
223
|
+
# respectively, while moving diagonally represents substitution. Each cell
|
224
|
+
# of the matrix contains the minimum cost it takes to reach that cell. The
|
225
|
+
# algorithm fills in cells in the matrix until it reaches the furthest
|
226
|
+
# corner.
|
224
227
|
#
|
225
|
-
# The search is done using Dijkstra's algorithm as implemented in
|
226
|
-
#
|
227
|
-
#
|
228
|
-
# #find_lowest_cost_alignment function.
|
228
|
+
# The search is done using Dijkstra's algorithm as implemented in the
|
229
|
+
# DijkstraSearch. A different search algorithm may be specified by
|
230
|
+
# including a mixin that redefines the #find_lowest_cost_alignment function.
|
229
231
|
#
|
230
|
-
# This class uses Levenshtein weighting scheme. Levenshtein assigns
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
#
|
235
|
-
#
|
232
|
+
# This class uses the Levenshtein weighting scheme. Levenshtein assigns a
|
233
|
+
# cost of 1 to insertions and deletions. It assigns a cost of 1 to
|
234
|
+
# substitutions when the items are different and 0 when they are the same.
|
235
|
+
# Different weighting schemes may be specified by overloading the #insert,
|
236
|
+
# #delete, and #substitute functions. The costs must be non-negative
|
237
|
+
# numbers.
|
236
238
|
class Alignment
|
237
239
|
include DijkstraSearch
|
238
240
|
|
@@ -245,6 +247,8 @@ module EditAlign
|
|
245
247
|
# Optionally either <em>source</em> or <em>dest</em> may be
|
246
248
|
# strings, in which they will be treated as arrays of characters.
|
247
249
|
def initialize(source, dest)
|
250
|
+
@path = nil
|
251
|
+
|
248
252
|
# Convert strings into arrays.
|
249
253
|
source = source.unpack('U*').collect {|c| c.chr} if source.class == String
|
250
254
|
dest = dest.unpack('U*').collect {|c| c.chr} if dest.class == String
|
@@ -314,7 +318,7 @@ module EditAlign
|
|
314
318
|
def edit_sequence # :yields: cell, {:substitute, :insert, :delete}
|
315
319
|
# The first time this function is called, walk backwards through
|
316
320
|
# the backtrace to create the @path instance variable.
|
317
|
-
if
|
321
|
+
if @path.nil?
|
318
322
|
@path = [@end]
|
319
323
|
while cell = @backtrace[@path[0]]
|
320
324
|
@path.unshift(cell)
|
@@ -400,7 +404,7 @@ module EditAlign
|
|
400
404
|
# => true
|
401
405
|
# irb(main):002:0> a = EditAlign::Alignment.new('captained', 'caspian')
|
402
406
|
# => <Alignment: 5>
|
403
|
-
# irb(main):003:0> puts a
|
407
|
+
# irb(main):003:0> puts a.to_grid
|
404
408
|
# - c a p t a i n e d
|
405
409
|
# - [0.00] 1.00 2.00 3.00 4.00 5.00 * * * *
|
406
410
|
# c 1.00 [0.00] 1.00 2.00 3.00 4.00 5.00 * * *
|
data/test/test_editalign.rb
CHANGED
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
!ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: editalign
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date:
|
8
|
-
summary:
|
6
|
+
version: 1.1.1
|
7
|
+
date: 2009-04-22 00:00:00 -07:00
|
8
|
+
summary: EditAlign calculates dynamic programming alignments between arrays
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
|
-
email: billmcn@
|
12
|
-
homepage: http://
|
13
|
-
rubyforge_project:
|
11
|
+
email: billmcn@gmail.com
|
12
|
+
homepage: http://editalign.rubyforge.org/
|
13
|
+
rubyforge_project: editalign
|
14
14
|
description: This module performs edit alignments between arrays. It returns alignments and edit distances.
|
15
15
|
autorequire:
|
16
16
|
default_executable:
|
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
25
25
|
platform: ruby
|
26
26
|
signing_key:
|
27
27
|
cert_chain:
|
28
|
+
post_install_message:
|
28
29
|
authors:
|
29
30
|
- W.P. McNeill
|
30
31
|
files:
|
@@ -36,12 +37,12 @@ files:
|
|
36
37
|
test_files:
|
37
38
|
- test/test_editalign.rb
|
38
39
|
rdoc_options:
|
39
|
-
- --title
|
40
|
-
- EditAlign --
|
41
|
-
- --main
|
42
|
-
- README
|
43
|
-
- --line-numbers
|
44
|
-
- --inline-source
|
40
|
+
- - --title
|
41
|
+
- EditAlign -- Edit Alignment
|
42
|
+
- --main
|
43
|
+
- README
|
44
|
+
- --line-numbers
|
45
|
+
- --inline-source
|
45
46
|
extra_rdoc_files:
|
46
47
|
- README
|
47
48
|
executables: []
|
@@ -50,13 +51,5 @@ extensions: []
|
|
50
51
|
|
51
52
|
requirements: []
|
52
53
|
|
53
|
-
dependencies:
|
54
|
-
|
55
|
-
name: PriorityQueue
|
56
|
-
version_requirement:
|
57
|
-
version_requirements: !ruby/object:Gem::Version::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 0.0.0
|
62
|
-
version:
|
54
|
+
dependencies: []
|
55
|
+
|