text_alignment 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +22 -0
- data/README.md +27 -0
- data/lib/text_alignment.rb +1 -0
- data/lib/text_alignment/approximate_fit.rb +61 -0
- data/lib/text_alignment/find_divisions.rb +117 -0
- data/lib/text_alignment/glcs_alignment.rb +311 -0
- data/lib/text_alignment/glcs_alignment_fast.rb +114 -0
- data/lib/text_alignment/glcs_required.rb +68 -0
- data/lib/text_alignment/lcs_alignment.rb +146 -0
- data/lib/text_alignment/lcs_cdiff.rb +61 -0
- data/lib/text_alignment/lcs_comparison.rb +63 -0
- data/lib/text_alignment/lcs_min.rb +160 -0
- data/lib/text_alignment/mappings.rb +75 -0
- data/lib/text_alignment/text_alignment.rb +223 -0
- data/lib/text_alignment/version.rb +3 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/text_alignment/glcs_alignment_spec.rb +302 -0
- data/spec/text_alignment/lcs_alignment_spec.rb +98 -0
- data/spec/text_alignment/lcs_comparision_spec.rb +322 -0
- data/spec/text_alignment/text_alignment_spec.rb +302 -0
- data/text_alignment.gemspec +22 -0
- metadata +108 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'diff-lcs'
|
3
|
+
require 'text_alignment/lcs_min'
|
4
|
+
require 'text_alignment/find_divisions'
|
5
|
+
require 'text_alignment/lcs_comparison'
|
6
|
+
require 'text_alignment/lcs_alignment'
|
7
|
+
require 'text_alignment/glcs_alignment'
|
8
|
+
require 'text_alignment/mappings'
|
9
|
+
|
10
|
+
module TextAlignment; end unless defined? TextAlignment
|
11
|
+
|
12
|
+
TextAlignment::SIGNATURE_NGRAM = 5 unless defined? TextAlignment::SIGNATURE_NGRAM
|
13
|
+
|
14
|
+
class TextAlignment::GLCSTextAlignment
|
15
|
+
attr_reader :position_map_begin, :position_map_end
|
16
|
+
attr_reader :common_elements, :mapped_elements
|
17
|
+
attr_reader :similarity
|
18
|
+
attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
|
19
|
+
|
20
|
+
def initialize(str1, str2, mappings = [], lcs = nil, sdiff = nil)
|
21
|
+
raise ArgumentError, "nil string" if str1.nil? || str2.nil?
|
22
|
+
raise ArgumentError, "nil mappings" if mappings.nil?
|
23
|
+
|
24
|
+
_glcs_alignment_fast(str1, str2, mapptings, lcs, sdiff)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def _glcs_alignment_fast(str1, str2, mappings, lcs, sdiff)
|
30
|
+
sdiff = TextAlignment::LCSMin.new(str1, str2).sdiff if sdiff.nil?
|
31
|
+
|
32
|
+
posmap_begin, posmap_end = {}, {}
|
33
|
+
@common_elements, @mapped_elements = [], []
|
34
|
+
|
35
|
+
addition, deletion = [], []
|
36
|
+
|
37
|
+
sdiff.each do |h|
|
38
|
+
case h.action
|
39
|
+
when '='
|
40
|
+
p1, p2 = h.old_position, h.new_position
|
41
|
+
|
42
|
+
@common_elements << [str1[p1], str2[p2]]
|
43
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
44
|
+
|
45
|
+
if !addition.empty? && deletion.empty?
|
46
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
47
|
+
elsif addition.empty? && !deletion.empty?
|
48
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
49
|
+
elsif !addition.empty? && !deletion.empty?
|
50
|
+
if addition.length > 1 || deletion.length > 1
|
51
|
+
galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
|
52
|
+
galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
53
|
+
galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
54
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
55
|
+
@common_elements += galign.common_elements
|
56
|
+
@mapped_elements += galign.mapped_elements
|
57
|
+
else
|
58
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
59
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
60
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
addition.clear; deletion.clear
|
65
|
+
|
66
|
+
when '!'
|
67
|
+
deletion << h.old_position
|
68
|
+
addition << h.new_position
|
69
|
+
when '-'
|
70
|
+
deletion << h.old_position
|
71
|
+
when '+'
|
72
|
+
addition << h.new_position
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
p1, p2 = str1.length, str2.length
|
77
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
78
|
+
|
79
|
+
if !addition.empty? && deletion.empty?
|
80
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
81
|
+
elsif addition.empty? && !deletion.empty?
|
82
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
83
|
+
elsif !addition.empty? && !deletion.empty?
|
84
|
+
if addition.length > 1 && deletion.length > 1
|
85
|
+
galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings)
|
86
|
+
galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
87
|
+
galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]}
|
88
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
89
|
+
@mapped_elements += galign.common_elements + galign.mapped_elements
|
90
|
+
else
|
91
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
92
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
93
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
@position_map_begin = posmap_begin.sort.to_h
|
98
|
+
@position_map_end = posmap_end.sort.to_h
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if __FILE__ == $0
|
103
|
+
str1 = '-βκ-'
|
104
|
+
str2 = '-betakappa-'
|
105
|
+
|
106
|
+
# anns1 = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
107
|
+
# anns2 = JSON.parse File.read(ARGV[1]), :symbolize_names => true
|
108
|
+
|
109
|
+
dictionary = [["β", "beta"]]
|
110
|
+
# align = TextAlignment::TextAlignment.new(str1, str2)
|
111
|
+
align = TextAlignment::TextAlignment.new(str1, str2, TextAlignment::MAPPINGS)
|
112
|
+
p align.common_elements
|
113
|
+
p align.mapped_elements
|
114
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module TextAlignment; end unless defined? TextAlignment
|
3
|
+
|
4
|
+
class << TextAlignment
|
5
|
+
def glcs_required?(str1, mappings = [])
|
6
|
+
raise ArgumentError, "nil string" if str1.nil?
|
7
|
+
raise ArgumentError, "nil mappings" if mappings.nil?
|
8
|
+
|
9
|
+
# character mappings can be safely applied to the strings withoug changing the position of other characters
|
10
|
+
character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
|
11
|
+
characters_from = character_mappings.collect{|m| m[0]}.join
|
12
|
+
characters_to = character_mappings.collect{|m| m[1]}.join
|
13
|
+
characters_to.gsub!(/-/, '\-')
|
14
|
+
|
15
|
+
str1.tr!(characters_from, characters_to)
|
16
|
+
|
17
|
+
str1 =~/([^\p{ASCII}][^\p{ASCII}])/
|
18
|
+
$1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
if __FILE__ == $0
|
23
|
+
|
24
|
+
dictionary = [
|
25
|
+
["×", "x"], #U+00D7 (multiplication sign)
|
26
|
+
["•", "*"], #U+2022 (bullet)
|
27
|
+
["Δ", "delta"], #U+0394 (greek capital letter delta)
|
28
|
+
["Φ", "phi"], #U+03A6 (greek capital letter phi)
|
29
|
+
["α", "alpha"], #U+03B1 (greek small letter alpha)
|
30
|
+
["β", "beta"], #U+03B2 (greek small letter beta)
|
31
|
+
["γ", "gamma"], #U+03B3 (greek small letter gamma)
|
32
|
+
["δ", "delta"], #U+03B4 (greek small letter delta)
|
33
|
+
["ε", "epsilon"], #U+03B5 (greek small letter epsilon)
|
34
|
+
["κ", "kappa"], #U+03BA (greek small letter kappa)
|
35
|
+
["λ", "lambda"], #U+03BB (greek small letter lambda)
|
36
|
+
["μ", "mu"], #U+03BC (greek small letter mu)
|
37
|
+
["χ", "chi"], #U+03C7 (greek small letter chi)
|
38
|
+
["ϕ", "phi"], #U+03D5 (greek phi symbol)
|
39
|
+
[" ", " "], #U+2009 (thin space)
|
40
|
+
[" ", " "], #U+200A (hair space)
|
41
|
+
[" ", " "], #U+00A0 (no-break space)
|
42
|
+
[" ", " "], #U+3000 (ideographic space)
|
43
|
+
["−", "-"], #U+2212 (minus sign)
|
44
|
+
["–", "-"], #U+2013 (en dash)
|
45
|
+
["′", "'"], #U+2032 (prime)
|
46
|
+
["‘", "'"], #U+2018 (left single quotation mark)
|
47
|
+
["’", "'"], #U+2019 (right single quotation mark)
|
48
|
+
["“", '"'], #U+201C (left double quotation mark)
|
49
|
+
["”", '"'] #U+201D (right double quotation mark)
|
50
|
+
]
|
51
|
+
|
52
|
+
str = "TGF-β–induced"
|
53
|
+
|
54
|
+
# from_text = "TGF-beta-induced"
|
55
|
+
# to_text = "TGF-β–induced"
|
56
|
+
|
57
|
+
# from_text = "TGF-β–β induced"
|
58
|
+
# to_text = "TGF-beta-beta induced"
|
59
|
+
|
60
|
+
# str = "-βκ-"
|
61
|
+
|
62
|
+
if ARGV.length == 1
|
63
|
+
str = File.read(ARGV[0])
|
64
|
+
end
|
65
|
+
# anns2 = JSON.parse File.read(ARGV[1]), :symbolize_names => true
|
66
|
+
|
67
|
+
p TextAlignment.glcs_required?(str, dictionary)
|
68
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'text_alignment/lcs_min'
|
3
|
+
|
4
|
+
class TextAlignment::LCSAlignment
|
5
|
+
attr_reader :position_map_begin, :position_map_end
|
6
|
+
attr_reader :common_elements, :mapped_elements
|
7
|
+
|
8
|
+
# It initializes the LCS table for the given two strings, str1 and str2.
|
9
|
+
# Exception is raised when nil given passed to either str1, str2 or dictionary
|
10
|
+
def initialize(str1, str2, lcs = nil, sdiff = nil)
|
11
|
+
raise ArgumentError, "nil string" if str1 == nil || str2 == nil
|
12
|
+
sdiff = TextAlignment::LCSMin.new(str1, str2).sdiff if sdiff.nil?
|
13
|
+
_compute_position_map(str1, str2, sdiff)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def _compute_position_map(str1, str2, sdiff)
|
19
|
+
posmap_begin, posmap_end = {}, {}
|
20
|
+
@common_elements, @mapped_elements = [], []
|
21
|
+
|
22
|
+
addition, deletion = [], []
|
23
|
+
|
24
|
+
sdiff.each do |h|
|
25
|
+
case h.action
|
26
|
+
when '='
|
27
|
+
p1, p2 = h.old_position, h.new_position
|
28
|
+
|
29
|
+
@common_elements << [str1[p1], str2[p2]]
|
30
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
31
|
+
|
32
|
+
if !addition.empty? && deletion.empty?
|
33
|
+
# correct the position for end
|
34
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
35
|
+
elsif addition.empty? && !deletion.empty?
|
36
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
37
|
+
elsif !addition.empty? && !deletion.empty?
|
38
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
39
|
+
|
40
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
41
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
42
|
+
end
|
43
|
+
|
44
|
+
addition.clear; deletion.clear
|
45
|
+
|
46
|
+
when '!'
|
47
|
+
deletion << h.old_position
|
48
|
+
addition << h.new_position
|
49
|
+
when '-'
|
50
|
+
deletion << h.old_position
|
51
|
+
when '+'
|
52
|
+
addition << h.new_position
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
p1, p2 = str1.length, str2.length
|
57
|
+
posmap_begin[p1], posmap_end[p1] = p2, p2
|
58
|
+
|
59
|
+
if !addition.empty? && deletion.empty?
|
60
|
+
# correct the position for end
|
61
|
+
posmap_end[p1] = p2 - addition.length unless p1 == 0
|
62
|
+
elsif addition.empty? && !deletion.empty?
|
63
|
+
deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2}
|
64
|
+
elsif !addition.empty? && !deletion.empty?
|
65
|
+
@mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]]
|
66
|
+
|
67
|
+
posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0]
|
68
|
+
deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil}
|
69
|
+
end
|
70
|
+
|
71
|
+
@position_map_begin = posmap_begin.sort.to_h
|
72
|
+
@position_map_end = posmap_end.sort.to_h
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
if __FILE__ == $0
|
78
|
+
|
79
|
+
# from_text = "TGF-β mRNA"
|
80
|
+
# to_text = "TGF-beta mRNA"
|
81
|
+
|
82
|
+
# from_text = "TGF-beta mRNA"
|
83
|
+
# to_text = "TGF-β mRNA"
|
84
|
+
|
85
|
+
# from_text = "TGF-beta mRNA"
|
86
|
+
# to_text = "TGF- mRNA"
|
87
|
+
|
88
|
+
# from_text = "TGF-β–induced"
|
89
|
+
# to_text = "TGF-beta-induced"
|
90
|
+
|
91
|
+
from_text = 'abxyzcd'
|
92
|
+
to_text = 'abcd'
|
93
|
+
|
94
|
+
# from_text = "TGF-beta-induced"
|
95
|
+
# to_text = "TGF-β–induced"
|
96
|
+
|
97
|
+
# from_text = "beta-induced"
|
98
|
+
# to_text = "TGF-beta-induced"
|
99
|
+
|
100
|
+
# from_text = "TGF-beta-induced"
|
101
|
+
# to_text = "beta-induced"
|
102
|
+
|
103
|
+
# from_text = "TGF-β–β induced"
|
104
|
+
# to_text = "TGF-beta-beta induced"
|
105
|
+
|
106
|
+
# from_text = "-βκ-"
|
107
|
+
# to_text = "-betakappa-"
|
108
|
+
|
109
|
+
# from_text = "-betakappa-beta-z"
|
110
|
+
# to_text = "-βκ-β–z"
|
111
|
+
|
112
|
+
# from_text = "affect C/EBP-β’s ability"
|
113
|
+
# to_text = "affect C/EBP-beta's ability"
|
114
|
+
|
115
|
+
# from_text = "12 ± 34"
|
116
|
+
# to_text = "12 +/- 34"
|
117
|
+
|
118
|
+
# from_text = "TGF-β–treated"
|
119
|
+
# to_text = "TGF-beta-treated"
|
120
|
+
|
121
|
+
# from_text = "in TGF-β–treated cells"
|
122
|
+
# to_text = "in TGF-beta-treated cells"
|
123
|
+
|
124
|
+
# from_text = "TGF-β–induced"
|
125
|
+
# to_text = "TGF-beta-induced"
|
126
|
+
|
127
|
+
# anns1 = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
128
|
+
# anns2 = JSON.parse File.read(ARGV[1]), :symbolize_names => true
|
129
|
+
|
130
|
+
# aligner = TextAlignment.new(anns1[:text], anns2[:text], [["Δ", "delta"], [" ", " "], ["–", "-"], ["′", "'"]])
|
131
|
+
# denotations = aligner.transform_denotations(anns1[:denotations])
|
132
|
+
|
133
|
+
denotations_s = <<-'ANN'
|
134
|
+
[{"id":"T0", "span":{"begin":1,"end":2}, "category":"Protein"}]
|
135
|
+
ANN
|
136
|
+
|
137
|
+
# denotations = JSON.parse denotations_s, :symbolize_names => true
|
138
|
+
|
139
|
+
a = TextAlignment::LCSAlignment.new(from_text, to_text)
|
140
|
+
p a.position_map_begin
|
141
|
+
puts "-----"
|
142
|
+
p a.position_map_end
|
143
|
+
# aligner = TextAlignment.new(from_text, to_text, [["Δ", "delta"], [" ", " "], ["–", "-"], ["′", "'"], ["β", "beta"]])
|
144
|
+
|
145
|
+
# p denotations
|
146
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'diff-lcs'
|
3
|
+
|
4
|
+
module TextAlignment; end unless defined? TextAlignment
|
5
|
+
|
6
|
+
module TextAlignment
|
7
|
+
NIL_CHARACTER = '_'
|
8
|
+
end
|
9
|
+
|
10
|
+
class << TextAlignment
|
11
|
+
|
12
|
+
def cdiff(str1, str2)
|
13
|
+
raise ArgumentError, "nil string" if str1.nil? || str2.nil?
|
14
|
+
raise "a nil character appears in the input string" if str1.index(TextAlignment::NIL_CHARACTER) || str2.index(TextAlignment::NIL_CHARACTER)
|
15
|
+
sdiff2cdiff(Diff::LCS.sdiff(str1, str2))
|
16
|
+
end
|
17
|
+
|
18
|
+
def sdiff2cdiff (sdiff)
|
19
|
+
raise ArgumentError, "nil sdiff" if sdiff.nil?
|
20
|
+
|
21
|
+
cdiff_str1, cdiff_str2 = '', ''
|
22
|
+
|
23
|
+
sdiff.each do |h|
|
24
|
+
case h.action
|
25
|
+
when '='
|
26
|
+
cdiff_str1 += h.old_element
|
27
|
+
cdiff_str2 += h.new_element
|
28
|
+
when '!'
|
29
|
+
cdiff_str1 += h.old_element + TextAlignment::NIL_CHARACTER
|
30
|
+
cdiff_str2 += TextAlignment::NIL_CHARACTER + h.new_element
|
31
|
+
when '-'
|
32
|
+
cdiff_str1 += h.old_element
|
33
|
+
cdiff_str2 += TextAlignment::NIL_CHARACTER
|
34
|
+
when '+'
|
35
|
+
cdiff_str1 += TextAlignment::NIL_CHARACTER
|
36
|
+
cdiff_str2 += h.new_element
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
cdiff_str1.gsub(/\n/, ' ') + "\n" + cdiff_str2.gsub(/\n/, ' ')
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
if __FILE__ == $0
|
46
|
+
require 'json'
|
47
|
+
str1 = 'abcde'
|
48
|
+
str2 = 'naxbyzabcdexydzem'
|
49
|
+
|
50
|
+
if ARGV.length == 2
|
51
|
+
str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
|
52
|
+
str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
|
53
|
+
end
|
54
|
+
|
55
|
+
puts "string 1: #{str1}"
|
56
|
+
puts "-----"
|
57
|
+
puts "string 2: #{str2}"
|
58
|
+
puts "-----"
|
59
|
+
puts "[cdiff]"
|
60
|
+
puts TextAlignment::cdiff(str1, str2)
|
61
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'text_alignment/lcs_min'
|
3
|
+
|
4
|
+
module TextAlignment; end unless defined? TextAlignment
|
5
|
+
|
6
|
+
class TextAlignment::LCSComparison
|
7
|
+
# The similarity ratio of the given two strings after stripping unmatched prefixes and suffixes
|
8
|
+
attr_reader :similarity
|
9
|
+
|
10
|
+
# The initial and final matching positions of str1 and str2
|
11
|
+
attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
|
12
|
+
|
13
|
+
def initialize(str1, str2, lcs = nil, sdiff = nil)
|
14
|
+
raise ArgumentError, "nil string" if str1 == nil || str2 == nil
|
15
|
+
@str1, @str2 = str1, str2
|
16
|
+
_lcs_comparison(str1, str2, lcs, sdiff)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def _lcs_comparison(str1, str2, lcs = nil, sdiff = nil)
|
22
|
+
if lcs.nil?
|
23
|
+
lcsmin = TextAlignment::LCSMin.new(str1, str2)
|
24
|
+
lcs = lcsmin.lcs
|
25
|
+
sdiff = lcsmin.sdiff
|
26
|
+
end
|
27
|
+
|
28
|
+
if lcs > 0
|
29
|
+
match_initial = sdiff.index{|d| d.action == '='}
|
30
|
+
match_final = sdiff.rindex{|d| d.action == '='}
|
31
|
+
|
32
|
+
@str1_match_initial = sdiff[match_initial].old_position
|
33
|
+
@str2_match_initial = sdiff[match_initial].new_position
|
34
|
+
@str1_match_final = sdiff[match_final].old_position
|
35
|
+
@str2_match_final = sdiff[match_final].new_position
|
36
|
+
@similarity = 2 * lcs / ((@str1_match_final - @str1_match_initial + 1) + (@str2_match_final - @str2_match_initial + 1)).to_f
|
37
|
+
else
|
38
|
+
@str1_match_initial = 0
|
39
|
+
@str2_match_initial = 0
|
40
|
+
@str1_match_final = 0
|
41
|
+
@str2_match_final = 0
|
42
|
+
@similarity = 0
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if __FILE__ == $0
|
48
|
+
require 'json'
|
49
|
+
str1 = 'naxbyzabcdexydzem'
|
50
|
+
str2 = 'abcde'
|
51
|
+
if ARGV.length == 2
|
52
|
+
str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
|
53
|
+
str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
|
54
|
+
end
|
55
|
+
comparison = TextAlignment::LCSComparison.new(str1, str2)
|
56
|
+
puts "Similarity: #{comparison.similarity}"
|
57
|
+
puts "String 1 match: (#{comparison.str1_match_initial}, #{comparison.str1_match_final})"
|
58
|
+
puts "String 2 match: (#{comparison.str2_match_initial}, #{comparison.str2_match_final})"
|
59
|
+
puts "-----"
|
60
|
+
puts '[' + str1[comparison.str1_match_initial .. comparison.str1_match_final] + ']'
|
61
|
+
puts "-----"
|
62
|
+
puts '[' + str2[comparison.str2_match_initial .. comparison.str2_match_final] + ']'
|
63
|
+
end
|