editalign 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -43,6 +43,8 @@ arrays. See the EditAlign::DijkstraSearch class for details.
43
43
  = History
44
44
 
45
45
  * 1-0-0 ... First version
46
+ * 1-1-0 ... Added EditAlign::VERSION
47
+ * 1-1-1 ... Improved command-line handling in align-strings and stress-test
46
48
 
47
49
  = See Also
48
50
 
@@ -55,11 +57,11 @@ strategy and optimizations to the one implemented here.
55
57
 
56
58
  = Copyright
57
59
 
58
- Copyright 2006, William Patrick McNeill
60
+ Copyright 2006-2009, William Patrick McNeill
59
61
 
60
62
  This program is distributed under the GNU General Public License.
61
63
 
62
64
  = Author
63
65
 
64
- W.P. McNeill mailto:billmcn@u.washington.edu
66
+ W.P. McNeill mailto:billmcn@gmail.com
65
67
 
@@ -1,7 +1,7 @@
1
- #!/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  #--
4
- # Copyright 2006 William Patrick McNeill
4
+ # Copyright 2006-2009 William Patrick McNeill
5
5
  #
6
6
  # This file is part of Editalign.
7
7
  #
@@ -24,49 +24,55 @@
24
24
  # Print the character alignment between two strings passed in on the
25
25
  # command line.
26
26
 
27
- require 'getoptlong'
27
+ require 'optparse'
28
28
  require 'editalign'
29
29
 
30
30
  class ExhaustiveSellersAlignment < EditAlign::SellersAlignment
31
31
  include EditAlign::ExhaustiveSearch
32
32
  end
33
33
 
34
- # Process command line options.
35
- opts = GetoptLong.new(["--match", "-m", GetoptLong::REQUIRED_ARGUMENT],
36
- ["--nomatch", "-n", GetoptLong::REQUIRED_ARGUMENT],
37
- ["--insert", "-i",GetoptLong::REQUIRED_ARGUMENT],
38
- ["--delete", "-d", GetoptLong::REQUIRED_ARGUMENT],
39
- ["--exhaustive", "-e", GetoptLong::NO_ARGUMENT]
40
- )
41
-
42
34
  match = 0
43
- mismatch = 1
35
+ substitute = 1
44
36
  insert = 1
45
37
  delete = 1
46
38
  exhaustive = false
47
- opts.each do |opt, arg|
48
- case opt
49
- when "--match"
50
- match = arg.to_f
51
- when "--nomatch"
52
- mismatch = arg.to_f
53
- when "--insert"
54
- insert = arg.to_f
55
- when "--delete"
56
- delete = arg.to_f
57
- when "--exhaustive"
58
- exhaustive = true
39
+ OptionParser.new do |opts|
40
+ opts.banner =<<-EOTEXT
41
+ #{File.basename(__FILE__)} [OPTION] source dest
42
+
43
+ Perform an edit distance alignment between the source and dest strings.
44
+
45
+ The options may be used to specify match, substitution, insertion, and
46
+ deletion costs.
47
+ EOTEXT
48
+ opts.on("-m", "--match COST", Float, "Match cost (default 0)") do |value|
49
+ match = value
59
50
  end
60
- end
51
+ opts.on("-s", "--substitution COST", Float, "Substitution cost (default 1)") do |value|
52
+ substitute = value
53
+ end
54
+ opts.on("-i", "--insertion COST", Float, "Insertion cost (default 1)") do |value|
55
+ insert = value
56
+ end
57
+ opts.on("-d", "--deletion COST", Float, "Deletion cost (default 1)") do |value|
58
+ delete = value
59
+ end
60
+ opts.on("-e", "--exhaustive", "Do exhaustive search for comparison (default false)") do |value|
61
+ exhaustive = value
62
+ end
63
+ end.parse!
61
64
 
62
- source = ARGV[0]
63
- dest = ARGV[1]
65
+ if not ARGV.length == 2
66
+ puts "Incorrect number of arguments."
67
+ exit(0)
68
+ end
69
+ source, dest = ARGV
64
70
 
65
71
  # Do alignments and print results.
66
72
  alignments = [EditAlign::SellersAlignment]
67
73
  alignments << ExhaustiveSellersAlignment if exhaustive
68
74
 
69
75
  alignments.each do |align_class|
70
- a = align_class.new(source, dest, match, mismatch, insert, delete)
76
+ a = align_class.new(source, dest, match, substitute, insert, delete)
71
77
  puts a, a.to_grid
72
78
  end
@@ -1,7 +1,7 @@
1
- #!/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  #--
4
- # Copyright 2006 William Patrick McNeill
4
+ # Copyright 2006-2009 William Patrick McNeill
5
5
  #
6
6
  # This file is part of Editalign.
7
7
  #
@@ -21,7 +21,7 @@
21
21
  #
22
22
  #++
23
23
 
24
- require "getoptlong"
24
+ require "optparse"
25
25
  require "editalign"
26
26
 
27
27
  class PureRubyEditAlign < EditAlign::Alignment
@@ -51,24 +51,31 @@ end
51
51
  # * verbose ... prints strings and edit distances
52
52
  # * pure-ruby ... uses the pure Ruby priority queue instead of the C extension
53
53
  # * int-cost ... uses integer costs
54
- def parse_command_line
55
- opts = GetoptLong.new(["--verbose", "-v", GetoptLong::NO_ARGUMENT],
56
- ["--pure-ruby", "-p", GetoptLong::NO_ARGUMENT],
57
- ["--int-cost", "-i", GetoptLong::NO_ARGUMENT])
54
+ def parse_command_line
58
55
  verbose = false
59
56
  pure_ruby = false
60
57
  int_cost = false
61
- opts.each do |opt, arg|
62
- case opt
63
- when "--verbose"
64
- verbose = true
65
- when "--pure-ruby"
66
- pure_ruby = true
67
- when "--int-cost"
68
- int_cost = true
58
+ OptionParser.new do |opts|
59
+ opts.banner =<<-EOTEXT
60
+ #{File.basename(__FILE__)} [OPTION] trials length edits
61
+
62
+ Perform a stress test on the alignment code.
63
+ EOTEXT
64
+ opts.on("-i", "--int-cost", "Use integer costs") do |value|
65
+ int_cost = value
69
66
  end
67
+ opts.on("-p", "--pure-ruby", "Use the pure Ruby priority queue instead of the C extension") do |value|
68
+ pure_ruby = value
69
+ end
70
+ opts.on("-v", "--verbose", "Print strings and edit distances") do |value|
71
+ verbose = value
72
+ end
73
+ end.parse!
74
+
75
+ if not ARGV.length == 3
76
+ puts "Incorrect number of arguments."
77
+ exit(0)
70
78
  end
71
-
72
79
  trials = Integer(ARGV[0])
73
80
  length = Integer(ARGV[1])
74
81
  edits = Integer(ARGV[2])
@@ -1,4 +1,4 @@
1
- # Copyright 2006 William Patrick McNeill
1
+ # Copyright 2006-2009 William Patrick McNeill
2
2
  #
3
3
  # Editalign is free software; you can redistribute it and/or modify it
4
4
  # under the terms of the GNU General Public License as published by
@@ -18,6 +18,8 @@
18
18
  # functions.
19
19
  module EditAlign
20
20
 
21
+ VERSION = "1.1.1"
22
+
21
23
  # This module employs Dijkstra's algorithm to find the lowest-cost
22
24
  # sequence of edit operations that will transform the source array
23
25
  # into the destination array. The alignment grid is treated as a
@@ -86,8 +88,9 @@ module EditAlign
86
88
  next unless next_cost < @cost[next_cell]
87
89
  @cost[next_cell] = next_cost
88
90
  @backtrace[next_cell] = cell
89
- agenda[next_cell] = priority_factory(next_cost, next_cell) \
90
91
  unless next_cost >= @cost[@end]
92
+ agenda[next_cell] = priority_factory(next_cost, next_cell)
93
+ end
91
94
  end
92
95
  end
93
96
  end
@@ -207,32 +210,31 @@ module EditAlign
207
210
 
208
211
 
209
212
  # The Alignment class is given a source and destination array at
210
- # construction time. It does a dynamic programming alignment
211
- # between them and makes the results of that alignment available
212
- # through instance methods.
213
+ # construction time. It does a dynamic programming alignment between them
214
+ # and makes the results of that alignment available through instance
215
+ # methods.
213
216
  #
214
- # If there are multiple alignments with equal edit distances
215
- # Alignment will find one of them. Which one is undefined.
217
+ # If there are multiple alignments with equal edit distances Alignment will
218
+ # find one of them. Which one is undefined.
216
219
  #
217
- # Alignment works by constructing a matrix with dimensions equal to
218
- # the length of the source and destination arrays. Moving
219
- # horizontally and vertically in the matrix represents insertion and
220
- # deletion operations, respectively, while moving diagonally
221
- # represents substitution. Each cell of the matrix contains the
222
- # minimum cost it takes to reach that cell. The algorithm fills in
223
- # cells in the matrix until it reaches the furthest corner.
220
+ # Alignment works by constructing a matrix with dimensions equal to the
221
+ # length of the source and destination arrays. Moving horizontally and
222
+ # vertically in the matrix represents insertion and deletion operations,
223
+ # respectively, while moving diagonally represents substitution. Each cell
224
+ # of the matrix contains the minimum cost it takes to reach that cell. The
225
+ # algorithm fills in cells in the matrix until it reaches the furthest
226
+ # corner.
224
227
  #
225
- # The search is done using Dijkstra's algorithm as implemented in
226
- # the DijkstraSearch. A different search algorithm may be specified
227
- # by including a mixin that redefines the
228
- # #find_lowest_cost_alignment function.
228
+ # The search is done using Dijkstra's algorithm as implemented in the
229
+ # DijkstraSearch. A different search algorithm may be specified by
230
+ # including a mixin that redefines the #find_lowest_cost_alignment function.
229
231
  #
230
- # This class uses Levenshtein weighting scheme. Levenshtein assigns
231
- # a cost of 1 to insertions and deletions. It assigns a cost of 1
232
- # to substitutions when the items are different and 0 when they are
233
- # the same. Different weighting schemes may be specified by
234
- # overloading the #insert, #delete, and #substitute functions. The
235
- # costs must be non-negative numbers.
232
+ # This class uses the Levenshtein weighting scheme. Levenshtein assigns a
233
+ # cost of 1 to insertions and deletions. It assigns a cost of 1 to
234
+ # substitutions when the items are different and 0 when they are the same.
235
+ # Different weighting schemes may be specified by overloading the #insert,
236
+ # #delete, and #substitute functions. The costs must be non-negative
237
+ # numbers.
236
238
  class Alignment
237
239
  include DijkstraSearch
238
240
 
@@ -245,6 +247,8 @@ module EditAlign
245
247
  # Optionally either <em>source</em> or <em>dest</em> may be
246
248
  # strings, in which they will be treated as arrays of characters.
247
249
  def initialize(source, dest)
250
+ @path = nil
251
+
248
252
  # Convert strings into arrays.
249
253
  source = source.unpack('U*').collect {|c| c.chr} if source.class == String
250
254
  dest = dest.unpack('U*').collect {|c| c.chr} if dest.class == String
@@ -314,7 +318,7 @@ module EditAlign
314
318
  def edit_sequence # :yields: cell, {:substitute, :insert, :delete}
315
319
  # The first time this function is called, walk backwards through
316
320
  # the backtrace to create the @path instance variable.
317
- if not @path
321
+ if @path.nil?
318
322
  @path = [@end]
319
323
  while cell = @backtrace[@path[0]]
320
324
  @path.unshift(cell)
@@ -400,7 +404,7 @@ module EditAlign
400
404
  # => true
401
405
  # irb(main):002:0> a = EditAlign::Alignment.new('captained', 'caspian')
402
406
  # => <Alignment: 5>
403
- # irb(main):003:0> puts a
407
+ # irb(main):003:0> puts a.to_grid
404
408
  # - c a p t a i n e d
405
409
  # - [0.00] 1.00 2.00 3.00 4.00 5.00 * * * *
406
410
  # c 1.00 [0.00] 1.00 2.00 3.00 4.00 5.00 * * *
@@ -1,7 +1,7 @@
1
- #!/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  #--
4
- # Copyright 2006 William Patrick McNeill
4
+ # Copyright 2006-2009 William Patrick McNeill
5
5
  #
6
6
  # This file is part of Editalign.
7
7
  #
metadata CHANGED
@@ -1,16 +1,16 @@
1
- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: editalign
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.0.0
7
- date: 2006-05-30 00:00:00 -07:00
8
- summary: Edit alignments between arrays
6
+ version: 1.1.1
7
+ date: 2009-04-22 00:00:00 -07:00
8
+ summary: EditAlign calculates dynamic programming alignments between arrays
9
9
  require_paths:
10
10
  - lib
11
- email: billmcn@u.washington.edu
12
- homepage: http://staff.washington.edu/billmcn/index.shtml
13
- rubyforge_project:
11
+ email: billmcn@gmail.com
12
+ homepage: http://editalign.rubyforge.org/
13
+ rubyforge_project: editalign
14
14
  description: This module performs edit alignments between arrays. It returns alignments and edit distances.
15
15
  autorequire:
16
16
  default_executable:
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - W.P. McNeill
30
31
  files:
@@ -36,12 +37,12 @@ files:
36
37
  test_files:
37
38
  - test/test_editalign.rb
38
39
  rdoc_options:
39
- - --title
40
- - EditAlign -- Ruby Edit Alignment
41
- - --main
42
- - README
43
- - --line-numbers
44
- - --inline-source
40
+ - - --title
41
+ - EditAlign -- Edit Alignment
42
+ - --main
43
+ - README
44
+ - --line-numbers
45
+ - --inline-source
45
46
  extra_rdoc_files:
46
47
  - README
47
48
  executables: []
@@ -50,13 +51,5 @@ extensions: []
50
51
 
51
52
  requirements: []
52
53
 
53
- dependencies:
54
- - !ruby/object:Gem::Dependency
55
- name: PriorityQueue
56
- version_requirement:
57
- version_requirements: !ruby/object:Gem::Version::Requirement
58
- requirements:
59
- - - ">"
60
- - !ruby/object:Gem::Version
61
- version: 0.0.0
62
- version:
54
+ dependencies: []
55
+