text_alignment 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +22 -0
- data/README.md +27 -0
- data/lib/text_alignment.rb +1 -0
- data/lib/text_alignment/approximate_fit.rb +61 -0
- data/lib/text_alignment/find_divisions.rb +117 -0
- data/lib/text_alignment/glcs_alignment.rb +311 -0
- data/lib/text_alignment/glcs_alignment_fast.rb +114 -0
- data/lib/text_alignment/glcs_required.rb +68 -0
- data/lib/text_alignment/lcs_alignment.rb +146 -0
- data/lib/text_alignment/lcs_cdiff.rb +61 -0
- data/lib/text_alignment/lcs_comparison.rb +63 -0
- data/lib/text_alignment/lcs_min.rb +160 -0
- data/lib/text_alignment/mappings.rb +75 -0
- data/lib/text_alignment/text_alignment.rb +223 -0
- data/lib/text_alignment/version.rb +3 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/text_alignment/glcs_alignment_spec.rb +302 -0
- data/spec/text_alignment/lcs_alignment_spec.rb +98 -0
- data/spec/text_alignment/lcs_comparision_spec.rb +322 -0
- data/spec/text_alignment/text_alignment_spec.rb +302 -0
- data/text_alignment.gemspec +22 -0
- metadata +108 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'diff-lcs'
|
3
|
+
require 'text_alignment/lcs_min'
|
4
|
+
require 'text_alignment/find_divisions'
|
5
|
+
require 'text_alignment/lcs_comparison'
|
6
|
+
require 'text_alignment/lcs_alignment'
|
7
|
+
require 'text_alignment/glcs_alignment'
|
8
|
+
require 'text_alignment/mappings'
|
9
|
+
|
10
|
+
module TextAlignment; end unless defined? TextAlignment
|
11
|
+
|
12
|
+
TextAlignment::SIGNATURE_NGRAM = 5 unless defined? TextAlignment::SIGNATURE_NGRAM
|
13
|
+
|
14
|
+
class TextAlignment::GLCSTextAlignment
|
15
|
+
attr_reader :position_map_begin, :position_map_end
|
16
|
+
attr_reader :common_elements, :mapped_elements
|
17
|
+
attr_reader :similarity
|
18
|
+
attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
|
19
|
+
|
20
|
+
def initialize(str1, str2, mappings = [], lcs = nil, sdiff = nil)
|
21
|
+
raise ArgumentError, "nil string" if str1.nil? || str2.nil?
|
22
|
+
raise ArgumentError, "nil mappings" if mappings.nil?
|
23
|
+
|
24
|
+
_glcs_alignment_fast(str1, str2, mapptings, lcs, sdiff)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def _glcs_alignment_fast(str1, str2, mappings, lcs, sdiff)
|
30
|
+
sdiff = TextAlignment::LCSMin.new(str1, str2).sdiff if sdiff.nil?
|
31
|
+
|
32
|
+
posmap_begin, posmap_end = {}, {}
|
33
|
+
@common_elements, @mapped_elements = [], []
|
34
|
+
|
35
|
+
addition, deletion = [], []
|
36
|
+
|
37
|
+
sdiff.each do |h|
|
38
|
+
case h.action
|
39
|
+
when '='
|
40
|
+
p1, p2 = h.old_position, h.new_position
|
41
|
+
|
42
|
+
@common_elements << [str1[p1], str2[p2]]
|
43
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
44
|
+
|
45
|
+
if !addition.empty? && deletion.empty?
|
46
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
47
|
+
elsif addition.empty? && !deletion.empty?
|
48
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
49
|
+
elsif !addition.empty? && !deletion.empty?
|
50
|
+
if addition.length > 1 || deletion.length > 1
|
51
|
+
galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
|
52
|
+
galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
53
|
+
galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
54
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
55
|
+
@common_elements += galign.common_elements
|
56
|
+
@mapped_elements += galign.mapped_elements
|
57
|
+
else
|
58
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
59
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
60
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
addition.clear; deletion.clear
|
65
|
+
|
66
|
+
when '!'
|
67
|
+
deletion << h.old_position
|
68
|
+
addition << h.new_position
|
69
|
+
when '-'
|
70
|
+
deletion << h.old_position
|
71
|
+
when '+'
|
72
|
+
addition << h.new_position
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
p1, p2 = str1.length, str2.length
|
77
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
78
|
+
|
79
|
+
if !addition.empty? && deletion.empty?
|
80
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
81
|
+
elsif addition.empty? && !deletion.empty?
|
82
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
83
|
+
elsif !addition.empty? && !deletion.empty?
|
84
|
+
if addition.length > 1 && deletion.length > 1
|
85
|
+
galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
|
86
|
+
galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
87
|
+
galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
88
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
89
|
+
@mapped_elements += galign.common_elements + galign.mapped_elements
|
90
|
+
else
|
91
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
92
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
93
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@position_map_begin = posmap_begin.sort.to_h
|
98
|
+
@position_map_end = posmap_end.sort.to_h
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if __FILE__ == $0
|
103
|
+
str1 = '-βκ-'
|
104
|
+
str2 = '-betakappa-'
|
105
|
+
|
106
|
+
# anns1 = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
107
|
+
# anns2 = JSON.parse File.read(ARGV[1]), :symbolize_names => true
|
108
|
+
|
109
|
+
dictionary = [["β", "beta"]]
|
110
|
+
# align = TextAlignment::TextAlignment.new(str1, str2)
|
111
|
+
align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
|
112
|
+
p align.common_elements
|
113
|
+
p align.mapped_elements
|
114
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module TextAlignment; end unless defined? TextAlignment
|
3
|
+
|
4
|
+
class << TextAlignment
|
5
|
+
def glcs_required?(str1, mappings = [])
|
6
|
+
raise ArgumentError, "nil string" if str1.nil?
|
7
|
+
raise ArgumentError, "nil mappings" if mappings.nil?
|
8
|
+
|
9
|
+
# character mappings can be safely applied to the strings withoug changing the position of other characters
|
10
|
+
character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
|
11
|
+
characters_from = character_mappings.collect{|m| m[0]}.join
|
12
|
+
characters_to = character_mappings.collect{|m| m[1]}.join
|
13
|
+
characters_to.gsub!(/-/, '\-')
|
14
|
+
|
15
|
+
str1.tr!(characters_from, characters_to)
|
16
|
+
|
17
|
+
str1 =~/([^\p{ASCII}][^\p{ASCII}])/
|
18
|
+
$1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
if __FILE__ == $0
|
23
|
+
|
24
|
+
dictionary = [
|
25
|
+
["×", "x"], #U+00D7 (multiplication sign)
|
26
|
+
["•", "*"], #U+2022 (bullet)
|
27
|
+
["Δ", "delta"], #U+0394 (greek capital letter delta)
|
28
|
+
["Φ", "phi"], #U+03A6 (greek capital letter phi)
|
29
|
+
["α", "alpha"], #U+03B1 (greek small letter alpha)
|
30
|
+
["β", "beta"], #U+03B2 (greek small letter beta)
|
31
|
+
["γ", "gamma"], #U+03B3 (greek small letter gamma)
|
32
|
+
["δ", "delta"], #U+03B4 (greek small letter delta)
|
33
|
+
["ε", "epsilon"], #U+03B5 (greek small letter epsilon)
|
34
|
+
["κ", "kappa"], #U+03BA (greek small letter kappa)
|
35
|
+
["λ", "lambda"], #U+03BB (greek small letter lambda)
|
36
|
+
["μ", "mu"], #U+03BC (greek small letter mu)
|
37
|
+
["χ", "chi"], #U+03C7 (greek small letter chi)
|
38
|
+
["ϕ", "phi"], #U+03D5 (greek phi symbol)
|
39
|
+
[" ", " "], #U+2009 (thin space)
|
40
|
+
[" ", " "], #U+200A (hair space)
|
41
|
+
[" ", " "], #U+00A0 (no-break space)
|
42
|
+
[" ", " "], #U+3000 (ideographic space)
|
43
|
+
["−", "-"], #U+2212 (minus sign)
|
44
|
+
["–", "-"], #U+2013 (en dash)
|
45
|
+
["′", "'"], #U+2032 (prime)
|
46
|
+
["‘", "'"], #U+2018 (left single quotation mark)
|
47
|
+
["’", "'"], #U+2019 (right single quotation mark)
|
48
|
+
["“", '"'], #U+201C (left double quotation mark)
|
49
|
+
["”", '"'] #U+201D (right double quotation mark)
|
50
|
+
]
|
51
|
+
|
52
|
+
str = "TGF-β–induced"
|
53
|
+
|
54
|
+
# from_text = "TGF-beta-induced"
|
55
|
+
# to_text = "TGF-β–induced"
|
56
|
+
|
57
|
+
# from_text = "TGF-β–β induced"
|
58
|
+
# to_text = "TGF-beta-beta induced"
|
59
|
+
|
60
|
+
# str = "-βκ-"
|
61
|
+
|
62
|
+
if ARGV.length == 1
|
63
|
+
str = File.read(ARGV[0])
|
64
|
+
end
|
65
|
+
# anns2 = JSON.parse File.read(ARGV[1]), :symbolize_names => true
|
66
|
+
|
67
|
+
p TextAlignment.glcs_required?(str, dictionary)
|
68
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'text_alignment/lcs_min'
|
3
|
+
|
4
|
+
class TextAlignment::LCSAlignment
|
5
|
+
attr_reader :position_map_begin, :position_map_end
|
6
|
+
attr_reader :common_elements, :mapped_elements
|
7
|
+
|
8
|
+
# It initializes the LCS table for the given two strings, str1 and str2.
|
9
|
+
# Exception is raised when nil given passed to either str1, str2 or dictionary
|
10
|
+
def initialize(str1, str2, lcs = nil, sdiff = nil)
|
11
|
+
raise ArgumentError, "nil string" if str1 == nil || str2 == nil
|
12
|
+
sdiff = TextAlignment::LCSMin.new(str1, str2).sdiff if sdiff.nil?
|
13
|
+
_compute_position_map(str1, str2, sdiff)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def _compute_position_map(str1, str2, sdiff)
|
19
|
+
posmap_begin, posmap_end = {}, {}
|
20
|
+
@common_elements, @mapped_elements = [], []
|
21
|
+
|
22
|
+
addition, deletion = [], []
|
23
|
+
|
24
|
+
sdiff.each do |h|
|
25
|
+
case h.action
|
26
|
+
when '='
|
27
|
+
p1, p2 = h.old_position, h.new_position
|
28
|
+
|
29
|
+
@common_elements << [str1[p1], str2[p2]]
|
30
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
31
|
+
|
32
|
+
if !addition.empty? && deletion.empty?
|
33
|
+
# correct the position for end
|
34
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
35
|
+
elsif addition.empty? && !deletion.empty?
|
36
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
37
|
+
elsif !addition.empty? && !deletion.empty?
|
38
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
39
|
+
|
40
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
41
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
42
|
+
end
|
43
|
+
|
44
|
+
addition.clear; deletion.clear
|
45
|
+
|
46
|
+
when '!'
|
47
|
+
deletion << h.old_position
|
48
|
+
addition << h.new_position
|
49
|
+
when '-'
|
50
|
+
deletion << h.old_position
|
51
|
+
when '+'
|
52
|
+
addition << h.new_position
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
p1, p2 = str1.length, str2.length
|
57
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
58
|
+
|
59
|
+
if !addition.empty? && deletion.empty?
|
60
|
+
# correct the position for end
|
61
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
62
|
+
elsif addition.empty? && !deletion.empty?
|
63
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
64
|
+
elsif !addition.empty? && !deletion.empty?
|
65
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
66
|
+
|
67
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
68
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
69
|
+
end
|
70
|
+
|
71
|
+
@position_map_begin = posmap_begin.sort.to_h
|
72
|
+
@position_map_end = posmap_end.sort.to_h
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
if __FILE__ == $0
|
78
|
+
|
79
|
+
# from_text = "TGF-β mRNA"
|
80
|
+
# to_text = "TGF-beta mRNA"
|
81
|
+
|
82
|
+
# from_text = "TGF-beta mRNA"
|
83
|
+
# to_text = "TGF-β mRNA"
|
84
|
+
|
85
|
+
# from_text = "TGF-beta mRNA"
|
86
|
+
# to_text = "TGF- mRNA"
|
87
|
+
|
88
|
+
# from_text = "TGF-β–induced"
|
89
|
+
# to_text = "TGF-beta-induced"
|
90
|
+
|
91
|
+
from_text = 'abxyzcd'
|
92
|
+
to_text = 'abcd'
|
93
|
+
|
94
|
+
# from_text = "TGF-beta-induced"
|
95
|
+
# to_text = "TGF-β–induced"
|
96
|
+
|
97
|
+
# from_text = "beta-induced"
|
98
|
+
# to_text = "TGF-beta-induced"
|
99
|
+
|
100
|
+
# from_text = "TGF-beta-induced"
|
101
|
+
# to_text = "beta-induced"
|
102
|
+
|
103
|
+
# from_text = "TGF-β–β induced"
|
104
|
+
# to_text = "TGF-beta-beta induced"
|
105
|
+
|
106
|
+
# from_text = "-βκ-"
|
107
|
+
# to_text = "-betakappa-"
|
108
|
+
|
109
|
+
# from_text = "-betakappa-beta-z"
|
110
|
+
# to_text = "-βκ-β–z"
|
111
|
+
|
112
|
+
# from_text = "affect C/EBP-β’s ability"
|
113
|
+
# to_text = "affect C/EBP-beta's ability"
|
114
|
+
|
115
|
+
# from_text = "12 ± 34"
|
116
|
+
# to_text = "12 +/- 34"
|
117
|
+
|
118
|
+
# from_text = "TGF-β–treated"
|
119
|
+
# to_text = "TGF-beta-treated"
|
120
|
+
|
121
|
+
# from_text = "in TGF-β–treated cells"
|
122
|
+
# to_text = "in TGF-beta-treated cells"
|
123
|
+
|
124
|
+
# from_text = "TGF-β–induced"
|
125
|
+
# to_text = "TGF-beta-induced"
|
126
|
+
|
127
|
+
# anns1 = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
128
|
+
# anns2 = JSON.parse File.read(ARGV[1]), :symbolize_names => true
|
129
|
+
|
130
|
+
# aligner = TextAlignment.new(anns1[:text], anns2[:text], [["Δ", "delta"], [" ", " "], ["–", "-"], ["′", "'"]])
|
131
|
+
# denotations = aligner.transform_denotations(anns1[:denotations])
|
132
|
+
|
133
|
+
denotations_s = <<-'ANN'
|
134
|
+
[{"id":"T0", "span":{"begin":1,"end":2}, "category":"Protein"}]
|
135
|
+
ANN
|
136
|
+
|
137
|
+
# denotations = JSON.parse denotations_s, :symbolize_names => true
|
138
|
+
|
139
|
+
a = TextAlignment::LCSAlignment.new(from_text, to_text)
|
140
|
+
p a.position_map_begin
|
141
|
+
puts "-----"
|
142
|
+
p a.position_map_end
|
143
|
+
# aligner = TextAlignment.new(from_text, to_text, [["Δ", "delta"], [" ", " "], ["–", "-"], ["′", "'"], ["β", "beta"]])
|
144
|
+
|
145
|
+
# p denotations
|
146
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'diff-lcs'
|
3
|
+
|
4
|
+
module TextAlignment; end unless defined? TextAlignment
|
5
|
+
|
6
|
+
module TextAlignment
|
7
|
+
NIL_CHARACTER = '_'
|
8
|
+
end
|
9
|
+
|
10
|
+
class << TextAlignment
|
11
|
+
|
12
|
+
def cdiff(str1, str2)
|
13
|
+
raise ArgumentError, "nil string" if str1.nil? || str2.nil?
|
14
|
+
raise "a nil character appears in the input string" if str1.index(TextAlignment::NIL_CHARACTER) || str2.index(TextAlignment::NIL_CHARACTER)
|
15
|
+
sdiff2cdiff(Diff::LCS.sdiff(str1, str2))
|
16
|
+
end
|
17
|
+
|
18
|
+
def sdiff2cdiff (sdiff)
|
19
|
+
raise ArgumentError, "nil sdiff" if sdiff.nil?
|
20
|
+
|
21
|
+
cdiff_str1, cdiff_str2 = '', ''
|
22
|
+
|
23
|
+
sdiff.each do |h|
|
24
|
+
case h.action
|
25
|
+
when '='
|
26
|
+
cdiff_str1 += h.old_element
|
27
|
+
cdiff_str2 += h.new_element
|
28
|
+
when '!'
|
29
|
+
cdiff_str1 += h.old_element + TextAlignment::NIL_CHARACTER
|
30
|
+
cdiff_str2 += TextAlignment::NIL_CHARACTER + h.new_element
|
31
|
+
when '-'
|
32
|
+
cdiff_str1 += h.old_element
|
33
|
+
cdiff_str2 += TextAlignment::NIL_CHARACTER
|
34
|
+
when '+'
|
35
|
+
cdiff_str1 += TextAlignment::NIL_CHARACTER
|
36
|
+
cdiff_str2 += h.new_element
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
cdiff_str1.gsub(/\n/, ' ') + "\n" + cdiff_str2.gsub(/\n/, ' ')
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
if __FILE__ == $0
|
46
|
+
require 'json'
|
47
|
+
str1 = 'abcde'
|
48
|
+
str2 = 'naxbyzabcdexydzem'
|
49
|
+
|
50
|
+
if ARGV.length == 2
|
51
|
+
str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
|
52
|
+
str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
|
53
|
+
end
|
54
|
+
|
55
|
+
puts "string 1: #{str1}"
|
56
|
+
puts "-----"
|
57
|
+
puts "string 2: #{str2}"
|
58
|
+
puts "-----"
|
59
|
+
puts "[cdiff]"
|
60
|
+
puts TextAlignment::cdiff(str1, str2)
|
61
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'text_alignment/lcs_min'
|
3
|
+
|
4
|
+
module TextAlignment; end unless defined? TextAlignment
|
5
|
+
|
6
|
+
class TextAlignment::LCSComparison
|
7
|
+
# The similarity ratio of the given two strings after stripping unmatched prefixes and suffixes
|
8
|
+
attr_reader :similarity
|
9
|
+
|
10
|
+
# The initial and final matching positions of str1 and str2
|
11
|
+
attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
|
12
|
+
|
13
|
+
def initialize(str1, str2, lcs = nil, sdiff = nil)
|
14
|
+
raise ArgumentError, "nil string" if str1 == nil || str2 == nil
|
15
|
+
@str1, @str2 = str1, str2
|
16
|
+
_lcs_comparison(str1, str2, lcs, sdiff)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def _lcs_comparison(str1, str2, lcs = nil, sdiff = nil)
|
22
|
+
if lcs.nil?
|
23
|
+
lcsmin = TextAlignment::LCSMin.new(str1, str2)
|
24
|
+
lcs = lcsmin.lcs
|
25
|
+
sdiff = lcsmin.sdiff
|
26
|
+
end
|
27
|
+
|
28
|
+
if lcs > 0
|
29
|
+
match_initial = sdiff.index{|d| d.action == '='}
|
30
|
+
match_final = sdiff.rindex{|d| d.action == '='}
|
31
|
+
|
32
|
+
@str1_match_initial = sdiff[match_initial].old_position
|
33
|
+
@str2_match_initial = sdiff[match_initial].new_position
|
34
|
+
@str1_match_final = sdiff[match_final].old_position
|
35
|
+
@str2_match_final = sdiff[match_final].new_position
|
36
|
+
@similarity = 2 * lcs / ((@str1_match_final - @str1_match_initial + 1) + (@str2_match_final - @str2_match_initial + 1)).to_f
|
37
|
+
else
|
38
|
+
@str1_match_initial = 0
|
39
|
+
@str2_match_initial = 0
|
40
|
+
@str1_match_final = 0
|
41
|
+
@str2_match_final = 0
|
42
|
+
@similarity = 0
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if __FILE__ == $0
|
48
|
+
require 'json'
|
49
|
+
str1 = 'naxbyzabcdexydzem'
|
50
|
+
str2 = 'abcde'
|
51
|
+
if ARGV.length == 2
|
52
|
+
str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
|
53
|
+
str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
|
54
|
+
end
|
55
|
+
comparison = TextAlignment::LCSComparison.new(str1, str2)
|
56
|
+
puts "Similarity: #{comparison.similarity}"
|
57
|
+
puts "String 1 match: (#{comparison.str1_match_initial}, #{comparison.str1_match_final})"
|
58
|
+
puts "String 2 match: (#{comparison.str2_match_initial}, #{comparison.str2_match_final})"
|
59
|
+
puts "-----"
|
60
|
+
puts '[' + str1[comparison.str1_match_initial .. comparison.str1_match_final] + ']'
|
61
|
+
puts "-----"
|
62
|
+
puts '[' + str2[comparison.str2_match_initial .. comparison.str2_match_final] + ']'
|
63
|
+
end
|