editalign 1.0.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -43,6 +43,8 @@ arrays. See the EditAlign::DijkstraSearch class for details.
43
43
  = History
44
44
 
45
45
  * 1-0-0 ... First version
46
+ * 1-1-0 ... Added EditAlign::VERSION
47
+ * 1-1-1 ... Improved command-line handling in align-strings and stress-test
46
48
 
47
49
  = See Also
48
50
 
@@ -55,11 +57,11 @@ strategy and optimizations to the one implemented here.
55
57
 
56
58
  = Copyright
57
59
 
58
- Copyright 2006, William Patrick McNeill
60
+ Copyright 2006-2009, William Patrick McNeill
59
61
 
60
62
  This program is distributed under the GNU General Public License.
61
63
 
62
64
  = Author
63
65
 
64
- W.P. McNeill mailto:billmcn@u.washington.edu
66
+ W.P. McNeill mailto:billmcn@gmail.com
65
67
 
@@ -1,7 +1,7 @@
1
- #!/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  #--
4
- # Copyright 2006 William Patrick McNeill
4
+ # Copyright 2006-2009 William Patrick McNeill
5
5
  #
6
6
  # This file is part of Editalign.
7
7
  #
@@ -24,49 +24,55 @@
24
24
  # Print the character alignment between two strings passed in on the
25
25
  # command line.
26
26
 
27
- require 'getoptlong'
27
+ require 'optparse'
28
28
  require 'editalign'
29
29
 
30
30
  class ExhaustiveSellersAlignment < EditAlign::SellersAlignment
31
31
  include EditAlign::ExhaustiveSearch
32
32
  end
33
33
 
34
- # Process command line options.
35
- opts = GetoptLong.new(["--match", "-m", GetoptLong::REQUIRED_ARGUMENT],
36
- ["--nomatch", "-n", GetoptLong::REQUIRED_ARGUMENT],
37
- ["--insert", "-i",GetoptLong::REQUIRED_ARGUMENT],
38
- ["--delete", "-d", GetoptLong::REQUIRED_ARGUMENT],
39
- ["--exhaustive", "-e", GetoptLong::NO_ARGUMENT]
40
- )
41
-
42
34
  match = 0
43
- mismatch = 1
35
+ substitute = 1
44
36
  insert = 1
45
37
  delete = 1
46
38
  exhaustive = false
47
- opts.each do |opt, arg|
48
- case opt
49
- when "--match"
50
- match = arg.to_f
51
- when "--nomatch"
52
- mismatch = arg.to_f
53
- when "--insert"
54
- insert = arg.to_f
55
- when "--delete"
56
- delete = arg.to_f
57
- when "--exhaustive"
58
- exhaustive = true
39
+ OptionParser.new do |opts|
40
+ opts.banner =<<-EOTEXT
41
+ #{File.basename(__FILE__)} [OPTION] source dest
42
+
43
+ Perform an edit distance alignment between the source and dest strings.
44
+
45
+ The options may be used to specify match, substitution, insertion, and
46
+ deletion costs.
47
+ EOTEXT
48
+ opts.on("-m", "--match COST", Float, "Match cost (default 0)") do |value|
49
+ match = value
59
50
  end
60
- end
51
+ opts.on("-s", "--substitution COST", Float, "Substitution cost (default 1)") do |value|
52
+ substitute = value
53
+ end
54
+ opts.on("-i", "--insertion COST", Float, "Insertion cost (default 1)") do |value|
55
+ insert = value
56
+ end
57
+ opts.on("-d", "--deletion COST", Float, "Deletion cost (default 1)") do |value|
58
+ delete = value
59
+ end
60
+ opts.on("-e", "--exhaustive", "Do exhaustive search for comparison (default false)") do |value|
61
+ exhaustive = value
62
+ end
63
+ end.parse!
61
64
 
62
- source = ARGV[0]
63
- dest = ARGV[1]
65
+ if not ARGV.length == 2
66
+ puts "Incorrect number of arguments."
67
+ exit(0)
68
+ end
69
+ source, dest = ARGV
64
70
 
65
71
  # Do alignments and print results.
66
72
  alignments = [EditAlign::SellersAlignment]
67
73
  alignments << ExhaustiveSellersAlignment if exhaustive
68
74
 
69
75
  alignments.each do |align_class|
70
- a = align_class.new(source, dest, match, mismatch, insert, delete)
76
+ a = align_class.new(source, dest, match, substitute, insert, delete)
71
77
  puts a, a.to_grid
72
78
  end
@@ -1,7 +1,7 @@
1
- #!/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  #--
4
- # Copyright 2006 William Patrick McNeill
4
+ # Copyright 2006-2009 William Patrick McNeill
5
5
  #
6
6
  # This file is part of Editalign.
7
7
  #
@@ -21,7 +21,7 @@
21
21
  #
22
22
  #++
23
23
 
24
- require "getoptlong"
24
+ require "optparse"
25
25
  require "editalign"
26
26
 
27
27
  class PureRubyEditAlign < EditAlign::Alignment
@@ -51,24 +51,31 @@ end
51
51
  # * verbose ... prints strings and edit distances
52
52
  # * pure-ruby ... uses the pure Ruby priority queue instead of the C extension
53
53
  # * int-cost ... uses integer costs
54
- def parse_command_line
55
- opts = GetoptLong.new(["--verbose", "-v", GetoptLong::NO_ARGUMENT],
56
- ["--pure-ruby", "-p", GetoptLong::NO_ARGUMENT],
57
- ["--int-cost", "-i", GetoptLong::NO_ARGUMENT])
54
+ def parse_command_line
58
55
  verbose = false
59
56
  pure_ruby = false
60
57
  int_cost = false
61
- opts.each do |opt, arg|
62
- case opt
63
- when "--verbose"
64
- verbose = true
65
- when "--pure-ruby"
66
- pure_ruby = true
67
- when "--int-cost"
68
- int_cost = true
58
+ OptionParser.new do |opts|
59
+ opts.banner =<<-EOTEXT
60
+ #{File.basename(__FILE__)} [OPTION] trials length edits
61
+
62
+ Perform a stress test on the alignment code.
63
+ EOTEXT
64
+ opts.on("-i", "--int-cost", "Use integer costs") do |value|
65
+ int_cost = value
69
66
  end
67
+ opts.on("-p", "--pure-ruby", "Use the pure Ruby priority queue instead of the C extension") do |value|
68
+ pure_ruby = value
69
+ end
70
+ opts.on("-v", "--verbose", "Print strings and edit distances") do |value|
71
+ verbose = value
72
+ end
73
+ end.parse!
74
+
75
+ if not ARGV.length == 3
76
+ puts "Incorrect number of arguments."
77
+ exit(0)
70
78
  end
71
-
72
79
  trials = Integer(ARGV[0])
73
80
  length = Integer(ARGV[1])
74
81
  edits = Integer(ARGV[2])
@@ -1,4 +1,4 @@
1
- # Copyright 2006 William Patrick McNeill
1
+ # Copyright 2006-2009 William Patrick McNeill
2
2
  #
3
3
  # Editalign is free software; you can redistribute it and/or modify it
4
4
  # under the terms of the GNU General Public License as published by
@@ -18,6 +18,8 @@
18
18
  # functions.
19
19
  module EditAlign
20
20
 
21
+ VERSION = "1.1.1"
22
+
21
23
  # This module employs Dijkstra's algorithm to find the lowest-cost
22
24
  # sequence of edit operations that will transform the source array
23
25
  # into the destination array. The alignment grid is treated as a
@@ -86,8 +88,9 @@ module EditAlign
86
88
  next unless next_cost < @cost[next_cell]
87
89
  @cost[next_cell] = next_cost
88
90
  @backtrace[next_cell] = cell
89
- agenda[next_cell] = priority_factory(next_cost, next_cell) \
90
91
  unless next_cost >= @cost[@end]
92
+ agenda[next_cell] = priority_factory(next_cost, next_cell)
93
+ end
91
94
  end
92
95
  end
93
96
  end
@@ -207,32 +210,31 @@ module EditAlign
207
210
 
208
211
 
209
212
  # The Alignment class is given a source and destination array at
210
- # construction time. It does a dynamic programming alignment
211
- # between them and makes the results of that alignment available
212
- # through instance methods.
213
+ # construction time. It does a dynamic programming alignment between them
214
+ # and makes the results of that alignment available through instance
215
+ # methods.
213
216
  #
214
- # If there are multiple alignments with equal edit distances
215
- # Alignment will find one of them. Which one is undefined.
217
+ # If there are multiple alignments with equal edit distances Alignment will
218
+ # find one of them. Which one is undefined.
216
219
  #
217
- # Alignment works by constructing a matrix with dimensions equal to
218
- # the length of the source and destination arrays. Moving
219
- # horizontally and vertically in the matrix represents insertion and
220
- # deletion operations, respectively, while moving diagonally
221
- # represents substitution. Each cell of the matrix contains the
222
- # minimum cost it takes to reach that cell. The algorithm fills in
223
- # cells in the matrix until it reaches the furthest corner.
220
+ # Alignment works by constructing a matrix with dimensions equal to the
221
+ # length of the source and destination arrays. Moving horizontally and
222
+ # vertically in the matrix represents insertion and deletion operations,
223
+ # respectively, while moving diagonally represents substitution. Each cell
224
+ # of the matrix contains the minimum cost it takes to reach that cell. The
225
+ # algorithm fills in cells in the matrix until it reaches the furthest
226
+ # corner.
224
227
  #
225
- # The search is done using Dijkstra's algorithm as implemented in
226
- # the DijkstraSearch. A different search algorithm may be specified
227
- # by including a mixin that redefines the
228
- # #find_lowest_cost_alignment function.
228
+ # The search is done using Dijkstra's algorithm as implemented in the
229
+ # DijkstraSearch. A different search algorithm may be specified by
230
+ # including a mixin that redefines the #find_lowest_cost_alignment function.
229
231
  #
230
- # This class uses Levenshtein weighting scheme. Levenshtein assigns
231
- # a cost of 1 to insertions and deletions. It assigns a cost of 1
232
- # to substitutions when the items are different and 0 when they are
233
- # the same. Different weighting schemes may be specified by
234
- # overloading the #insert, #delete, and #substitute functions. The
235
- # costs must be non-negative numbers.
232
+ # This class uses the Levenshtein weighting scheme. Levenshtein assigns a
233
+ # cost of 1 to insertions and deletions. It assigns a cost of 1 to
234
+ # substitutions when the items are different and 0 when they are the same.
235
+ # Different weighting schemes may be specified by overloading the #insert,
236
+ # #delete, and #substitute functions. The costs must be non-negative
237
+ # numbers.
236
238
  class Alignment
237
239
  include DijkstraSearch
238
240
 
@@ -245,6 +247,8 @@ module EditAlign
245
247
  # Optionally either <em>source</em> or <em>dest</em> may be
246
248
  # strings, in which they will be treated as arrays of characters.
247
249
  def initialize(source, dest)
250
+ @path = nil
251
+
248
252
  # Convert strings into arrays.
249
253
  source = source.unpack('U*').collect {|c| c.chr} if source.class == String
250
254
  dest = dest.unpack('U*').collect {|c| c.chr} if dest.class == String
@@ -314,7 +318,7 @@ module EditAlign
314
318
  def edit_sequence # :yields: cell, {:substitute, :insert, :delete}
315
319
  # The first time this function is called, walk backwards through
316
320
  # the backtrace to create the @path instance variable.
317
- if not @path
321
+ if @path.nil?
318
322
  @path = [@end]
319
323
  while cell = @backtrace[@path[0]]
320
324
  @path.unshift(cell)
@@ -400,7 +404,7 @@ module EditAlign
400
404
  # => true
401
405
  # irb(main):002:0> a = EditAlign::Alignment.new('captained', 'caspian')
402
406
  # => <Alignment: 5>
403
- # irb(main):003:0> puts a
407
+ # irb(main):003:0> puts a.to_grid
404
408
  # - c a p t a i n e d
405
409
  # - [0.00] 1.00 2.00 3.00 4.00 5.00 * * * *
406
410
  # c 1.00 [0.00] 1.00 2.00 3.00 4.00 5.00 * * *
@@ -1,7 +1,7 @@
1
- #!/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  #--
4
- # Copyright 2006 William Patrick McNeill
4
+ # Copyright 2006-2009 William Patrick McNeill
5
5
  #
6
6
  # This file is part of Editalign.
7
7
  #
metadata CHANGED
@@ -1,16 +1,16 @@
1
- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: editalign
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.0.0
7
- date: 2006-05-30 00:00:00 -07:00
8
- summary: Edit alignments between arrays
6
+ version: 1.1.1
7
+ date: 2009-04-22 00:00:00 -07:00
8
+ summary: EditAlign calculates dynamic programming alignments between arrays
9
9
  require_paths:
10
10
  - lib
11
- email: billmcn@u.washington.edu
12
- homepage: http://staff.washington.edu/billmcn/index.shtml
13
- rubyforge_project:
11
+ email: billmcn@gmail.com
12
+ homepage: http://editalign.rubyforge.org/
13
+ rubyforge_project: editalign
14
14
  description: This module performs edit alignments between arrays. It returns alignments and edit distances.
15
15
  autorequire:
16
16
  default_executable:
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - W.P. McNeill
30
31
  files:
@@ -36,12 +37,12 @@ files:
36
37
  test_files:
37
38
  - test/test_editalign.rb
38
39
  rdoc_options:
39
- - --title
40
- - EditAlign -- Ruby Edit Alignment
41
- - --main
42
- - README
43
- - --line-numbers
44
- - --inline-source
40
+ - - --title
41
+ - EditAlign -- Edit Alignment
42
+ - --main
43
+ - README
44
+ - --line-numbers
45
+ - --inline-source
45
46
  extra_rdoc_files:
46
47
  - README
47
48
  executables: []
@@ -50,13 +51,5 @@ extensions: []
50
51
 
51
52
  requirements: []
52
53
 
53
- dependencies:
54
- - !ruby/object:Gem::Dependency
55
- name: PriorityQueue
56
- version_requirement:
57
- version_requirements: !ruby/object:Gem::Version::Requirement
58
- requirements:
59
- - - ">"
60
- - !ruby/object:Gem::Version
61
- version: 0.0.0
62
- version:
54
+ dependencies: []
55
+