text_alignment 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 55b3e5d56a84a2255984ae2769bb7b2d59029ad319b7188c73e8954877a5298f
4
+ data.tar.gz: dfb4aa052caa81ee6812e047f4e4842aacb33790ceb0f2530cec7fc25324b9e7
5
+ SHA512:
6
+ metadata.gz: 737ea63a822ec2fe2cf1d9834a9db4759f8a3e4be25098eb7cb8c5fd37e485515c66c5e9946bfc962d6e0658ec82f762bc099a54172708115f6ff3f744db3b9a
7
+ data.tar.gz: 303298035e87e965c59801a9ee44b4737cbbc1ec892f0d2180a9fc0a4eef33c554f2a6ea77f51ba3043e38994502520e8c101a725a715868e1ed345142dbc9c7
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ doc/*
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source 'https://rubygems.org'
2
+ ruby '2.3.4'
3
+
4
+ # Specify your gem's dependencies in *.gemspec
5
+ #gemspec
6
+
7
+ gem 'ruby-dictionary', '~>1.1', '>=1.1.1'
8
+
9
+ group :test do
10
+ gem 'rspec', '~>3.0'
11
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.2.5)
5
+ rspec (3.0.0)
6
+ rspec-core (~> 3.0.0)
7
+ rspec-expectations (~> 3.0.0)
8
+ rspec-mocks (~> 3.0.0)
9
+ rspec-core (3.0.4)
10
+ rspec-support (~> 3.0.0)
11
+ rspec-expectations (3.0.4)
12
+ diff-lcs (>= 1.2.0, < 2.0)
13
+ rspec-support (~> 3.0.0)
14
+ rspec-mocks (3.0.4)
15
+ rspec-support (~> 3.0.0)
16
+ rspec-support (3.0.4)
17
+ ruby-dictionary (1.1.1)
18
+
19
+ PLATFORMS
20
+ ruby
21
+
22
+ DEPENDENCIES
23
+ rspec (~> 3.0)
24
+ ruby-dictionary (~> 1.1, >= 1.1.1)
25
+
26
+ RUBY VERSION
27
+ ruby 2.3.4p301
28
+
29
+ BUNDLED WITH
30
+ 1.17.3
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Jin-Dong Kim
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,27 @@
1
+ # SequenceAlignment
2
+
3
+ It allows to get an optimal alignemnt of two character sequences, e.g., text.
4
+
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'sequence_alignment'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install sequence_alignment
19
+
20
+
21
+ ## Contributing
22
+
23
+ 1. Fork it
24
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
25
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
26
+ 4. Push to the branch (`git push origin my-new-feature`)
27
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require 'text_alignment/text_alignment'
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+ module TextAlignment; end unless defined? TextAlignment
3
+
4
+ # approximate the location of str1 in str2
5
+ module TextAlignment
6
+ SIGNATURE_NGRAM = 5
7
+ MIN_LENGTH_FOR_APPROXIMATION = 50
8
+ BUFFER_RATE = 0.2
9
+ end
10
+
11
+ class << TextAlignment
12
+
13
+ # If finds an approximate region of str2 that contains str1
14
+ def approximate_fit(str1, str2)
15
+ raise ArgumentError, 'nil string' if str1.nil? || str2.nil?
16
+ return 0, str2.length if str2.length < TextAlignment::MIN_LENGTH_FOR_APPROXIMATION
17
+
18
+ ngram1 = (0 .. str1.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str1[i, TextAlignment::SIGNATURE_NGRAM]}
19
+ ngram2 = (0 .. str2.length - TextAlignment::SIGNATURE_NGRAM).collect{|i| str2[i, TextAlignment::SIGNATURE_NGRAM]}
20
+ ngram_shared = ngram1 & ngram2
21
+
22
+ # If there is no shared n-gram found, it may mean there is no serious overlap between the two strings
23
+ return nil, nil if ngram_shared.empty?
24
+
25
+ # approximate the beginning of the fit
26
+ signature_ngram = ngram_shared.detect{|g| ngram2.count(g) == 1}
27
+
28
+ return nil, nil if signature_ngram.nil? #raise "no signature ngram"
29
+ offset = str1.index(signature_ngram)
30
+ fit_begin = str2.index(signature_ngram) - offset - (offset * TextAlignment::BUFFER_RATE).to_i
31
+ fit_begin = 0 if fit_begin < 0
32
+
33
+ # to change the order according to ngram2
34
+ ngram_shared = ngram2 & ngram1
35
+
36
+ # approximate the end of the fit
37
+ ngram_shared_reverse = ngram_shared.reverse
38
+ ngram2_reverse = ngram2.reverse
39
+ signature_ngram = ngram_shared_reverse.detect{|g| ngram2_reverse.count(g) == 1}
40
+ return nil, nil if signature_ngram.nil? # raise "no signature ngram"
41
+ offset = str1.length - str1.rindex(signature_ngram)
42
+ fit_end = str2.rindex(signature_ngram) + offset + (offset * TextAlignment::BUFFER_RATE).to_i
43
+ fit_end = str2.length if fit_end > str2.length
44
+
45
+ return nil, nil if fit_begin >= fit_end
46
+ return fit_begin, fit_end
47
+ end
48
+ end
49
+
50
+ if __FILE__ == $0
51
+ require 'json'
52
+
53
+ if ARGV.length == 2
54
+ str1 = JSON.parse(File.read(ARGV[0]).strip)["text"]
55
+ str2 = JSON.parse(File.read(ARGV[1]).strip)["text"]
56
+
57
+ loc = TextAlignment::approximate_fit(str1, str2)
58
+ p loc
59
+ puts str2[loc[0]...loc[1]]
60
+ end
61
+ end
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+ require 'text_alignment/approximate_fit'
3
+ require 'text_alignment/lcs_comparison'
4
+
5
+ module TextAlignment; end unless defined? TextAlignment
6
+
7
+ # to work on the hash representation of denotations
8
+ # to assume that there is no bag representation to this method
9
+
10
+ module TextAlignment
11
+ TextAlignment::SIMILARITY_THRESHOLD = 0.8
12
+ end
13
+
14
+ class << TextAlignment
15
+
16
+ # It finds, among the sources, the right divisions for the taraget text to fit in.
17
+ def find_divisions(target, sources, mappings = [])
18
+ raise ArgumentError, "nil target" if target == nil
19
+ raise ArgumentError, "nil or empty sources" if sources == nil || sources.empty?
20
+ raise ArgumentError, "nil mappings" if mappings == nil
21
+
22
+ character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
23
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
24
+ characters_from = character_mappings.collect{|m| m[0]}.join
25
+ characters_to = character_mappings.collect{|m| m[1]}.join
26
+ characters_to.gsub!(/-/, '\-')
27
+
28
+ target.tr!(characters_from, characters_to)
29
+ sources.each{|source| source[:text].tr!(characters_from, characters_to)}
30
+
31
+ sources.sort!{|s1, s2| s1[:text].size <=> s2[:text].size}
32
+
33
+ TextAlignment._find_divisions(target, sources)
34
+ end
35
+
36
+ def _find_divisions(target, sources)
37
+ mode, m, c, offset_begin = nil, nil, nil, nil
38
+
39
+ sources.each_with_index do |source, i|
40
+ if target.size < source[:text].size
41
+ mode = :t_in_s
42
+ str1 = target
43
+ str2 = source[:text]
44
+ else
45
+ mode = :s_in_t
46
+ str1 = source[:text]
47
+ str2 = target
48
+ end
49
+
50
+ len1 = str1.length
51
+ len2 = str2.length
52
+
53
+ offset_begin, offset_end = 0, -1
54
+ offset_begin, offset_end = approximate_fit(str1, str2) if (len2 - len1) > len1 * (1 - TextAlignment::SIMILARITY_THRESHOLD)
55
+
56
+ unless offset_begin.nil?
57
+ c = TextAlignment::LCSComparison.new(str1, str2[offset_begin .. offset_end])
58
+ if (c.similarity > TextAlignment::SIMILARITY_THRESHOLD) && ((len1 - (c.str1_match_final - c.str1_match_initial + 1)) < len1 * (1 - TextAlignment::SIMILARITY_THRESHOLD))
59
+ m = i
60
+ break
61
+ end
62
+ end
63
+ end
64
+
65
+ # return remaining target and sources if m.nil?
66
+ return [[-1, [target, sources.collect{|s| s[:divid]}]]] if m.nil?
67
+
68
+ index = if mode == :t_in_s
69
+ [sources[m][:divid], [0, target.size]]
70
+ else # :s_in_t
71
+ [sources[m][:divid], [c.str2_match_initial + offset_begin, c.str2_match_final + offset_begin + 1]]
72
+ end
73
+
74
+ next_target = target[0 ... index[1][0]] + target[index[1][1] .. -1]
75
+ sources.delete_at(m)
76
+
77
+ if next_target.strip.empty? || sources.empty?
78
+ return [index]
79
+ else
80
+ more_index = _find_divisions(next_target, sources)
81
+ gap = index[1][1] - index[1][0]
82
+ more_index.each do |i|
83
+ if (i[0] > -1)
84
+ i[1][0] += gap if i[1][0] >= index[1][0]
85
+ i[1][1] += gap if i[1][1] > index[1][0]
86
+ end
87
+ end
88
+ return [index] + more_index
89
+ end
90
+ end
91
+ end
92
+
93
+ if __FILE__ == $0
94
+ require 'json'
95
+ if ARGV.length == 2
96
+ target = JSON.parse File.read(ARGV[0]), :symbolize_names => true
97
+ target_text = target[:text].strip
98
+
99
+ sources = JSON.parse File.read(ARGV[1]), :symbolize_names => true
100
+ div_index = TextAlignment::find_divisions(target_text, sources)
101
+
102
+ # str1 = File.read(ARGV[0]).strip
103
+ # str2 = File.read(ARGV[1]).strip
104
+ # div_index = TextAlignment::find_divisions(str1, [str2])
105
+
106
+ puts "target length: #{target_text.length}"
107
+ div_index.each do |i|
108
+ if i[0] >= 0
109
+ puts "[Div: #{i[0]}] (#{i[1][0]}, #{i[1][1]})"
110
+ puts target_text[i[1][0] ... i[1][1]]
111
+ puts "=========="
112
+ else
113
+ p i
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,311 @@
1
+ #!/usr/bin/env ruby
2
+ require 'ruby-dictionary'
3
+
4
+ module TextAlignment; end unless defined? TextAlignment
5
+
6
+ # An instance of this class holds the results of generalized LCS computation for the two strings str1 and str2.
7
+ # an optional dictionary is used for generalized suffix comparision.
8
+ class TextAlignment::GLCSAlignment
9
+ # The mapping function from str1 to str2
10
+ attr_reader :position_map_begin, :position_map_end
11
+
12
+ # The position initial and final position of matching on str1 and str2
13
+ attr_reader :str1_match_begin, :str1_match_end, :str2_match_begin, :str2_match_end
14
+
15
+ # The length of GLCS
16
+ attr_reader :length
17
+
18
+ # the elements that are common in the two strings, str1 and str2
19
+ attr_reader :common_elements
20
+
21
+ # the elements that are mapped to each other in the two strings, str1 and str2
22
+ attr_reader :mapped_elements
23
+
24
+ # the string of non-mapped characters
25
+ attr_reader :diff_strings
26
+
27
+ attr_reader :similarity
28
+
29
+ # It initializes the GLCS table for the given two strings, str1 and str2.
30
+ # When the array, mappings, is given, general suffix comparision is performed based on the mappings.
31
+ # Exception is raised when nil given passed to either str1, str2 or dictionary
32
+ def initialize(str1, str2, mappings = [])
33
+ raise ArgumentError, "nil string" if str1 == nil || str2 == nil
34
+ raise ArgumentError, "nil dictionary" if mappings == nil
35
+
36
+ # index the mappings in hash.
37
+ @dic = (mappings + mappings.map{|e| e.reverse}).to_h
38
+
39
+ # prefix dictionary
40
+ @pdic = Dictionary.new(mappings.flatten)
41
+
42
+ @len1 = str1.length
43
+ @len2 = str2.length
44
+
45
+ # add a final marker to the end of the strings
46
+ @str1 = str1 + '_'
47
+ @str2 = str2 + '_'
48
+
49
+ # compute the GLCS table
50
+ @glcs = _compute_glcs_table
51
+ @length = @glcs[0][0]
52
+
53
+ _trace_glcs_table
54
+ end
55
+
56
+ # Prints the GLCS table
57
+ def show_glcs
58
+ puts "\t\t" + @str2.split(//).join("\t")
59
+ @glcs.each_with_index do |row, i|
60
+ h = (@str1[i].nil?)? '' : @str1[i]
61
+ puts i.to_s + "\t" + h + "\t" + row.join("\t")
62
+ end
63
+ end
64
+
65
+ # Returns the character-by-character difference
66
+ def cdiff
67
+ cdiff1, cdiff2 = '', ''
68
+ p1, p2 = 0, 0
69
+ begin
70
+ s1, s2 = _prefix_eq(@str1[p1...@len1], @str2[p2...@len2])
71
+ if s1 != nil
72
+ l1, l2 = s1.length, s2.length
73
+
74
+ cdiff1 += s1; cdiff2 += s2
75
+ if l1 > l2 then cdiff2 += ' ' * (l1 - l2) else cdiff1 += ' ' * (l2 - l1) end
76
+ p1 += s1.length; p2 += s2.length
77
+ elsif p2 < @len2 && (p1 == @len1 or @glcs[p1][p2 + 1] > @glcs[p1 + 1][p2])
78
+ cdiff1 += ' '
79
+ cdiff2 += @str2[p2]
80
+ p2 += 1
81
+ elsif p1 < @len1 && (p2 == @len2 or @glcs[p1][p2 + 1] <= @glcs[p1 + 1][p2])
82
+ cdiff1 += @str1[p1]
83
+ cdiff2 += ' '
84
+ p1 += 1
85
+ end
86
+ end until p1 == @len1 && p2 == @len2
87
+
88
+ return [cdiff1, cdiff2]
89
+ end
90
+
91
+
92
+ # Computes the similarity of the two strings
93
+ def similarity(cut = false)
94
+ c = @length
95
+
96
+ l1 = c + @diff_strings[0].length
97
+ l2 = c + @diff_strings[1].length
98
+
99
+ if cut
100
+ l1 -= front_overflow if front_overflow > 0
101
+ l1 -= rear_overflow if rear_overflow > 0
102
+ l1 += front_overflow if front_overflow < 0
103
+ l1 += rear_overflow if rear_overflow < 0
104
+ end
105
+
106
+ similarity = 2 * c / (l1 + l2).to_f
107
+ end
108
+
109
+ def transform_a_span(span)
110
+ {:begin=>@position_map_begin[span[:begin]], :end=>@position_map_end[span[:end]]}
111
+ end
112
+
113
+ def transform_spans(spans)
114
+ spans.map{|span| transform_a_span(span)}
115
+ end
116
+
117
+
118
+ private
119
+
120
+ # Computes the GLCS table for the two strings, @str1 and @str2.
121
+ # Unlike normal LCS algorithms, the computation is performed from the end to the beginning of the strings.
122
+ def _compute_glcs_table
123
+ glcs = Array.new(@len1 + 1) { Array.new(@len2 + 1) }
124
+
125
+ # initialize the final row and the final column
126
+ (0..@len1).each {|p| glcs[p][@len2] = 0}
127
+ (0..@len2).each {|p| glcs[@len1][p] = 0}
128
+
129
+ # compute the GLCS table
130
+ str1_reverse_iteration = (0...@len1).to_a.reverse
131
+ str2_reverse_iteration = (0...@len2).to_a.reverse
132
+
133
+ str1_reverse_iteration.each do |p1|
134
+ str2_reverse_iteration.each do |p2|
135
+ s1, s2 = _prefix_eq(@str1[p1...@len1], @str2[p2...@len2])
136
+ unless s1 == nil
137
+ glcs[p1][p2] = glcs[p1 + s1.length][p2 + s2.length] + 1
138
+ else
139
+ glcs[p1][p2] = (glcs[p1][p2 + 1] > glcs[p1 + 1][p2])? glcs[p1][p2 + 1] : glcs[p1 + 1][p2]
140
+ end
141
+ end
142
+ end
143
+
144
+ glcs
145
+ end
146
+
147
+ # Backtrace the GLCS table, computing the mapping function from str1 to str2
148
+ # As its side effect, it updates four global variables
149
+ # * front_overflow: the length of the front part of str1 that cannot fit in str2.
150
+ # * rear_overflow: the length of the rear part of str1 that cannot fit in str2.
151
+ # * common_elements: an array which stores the common elements in the two strings.
152
+ # * mapped_elements: an array which stores the mapped elements in the two strings.
153
+ def _trace_glcs_table
154
+ @front_overflow, @rear_overflow = 0, 0
155
+ @common_elements, @mapped_elements = [], []
156
+ diff_string1, diff_string2 = '', ''
157
+
158
+ @position_map_begin, @position_map_end = {}, {}
159
+ addition, deletion = [], []
160
+ p1, p2 = 0, 0
161
+
162
+ while p1 <= @len1 && p2 <= @len2
163
+ s1, s2 = _prefix_eq(@str1[p1..@len1], @str2[p2..@len2])
164
+ if s1 != nil
165
+ l1, l2 = s1.length, s2.length
166
+
167
+ @position_map_begin[p1], @position_map_end[p1] = p2, p2
168
+ (p1 + 1 ... p1 + l1).each{|i| @position_map_begin[i], @position_map_end[i] = nil, nil}
169
+
170
+ @common_elements << [s1, s2]
171
+
172
+ if !addition.empty? && deletion.empty?
173
+ # If an addition is found in the front or the rear, it is a case of underflow
174
+ @str2_match_begin = addition.length if p1 == 0
175
+ @str2_match_end = l2 - addition.length if p1 == @len1
176
+
177
+ if p1 == 0
178
+ # leave as it is
179
+ elsif p1 == @len1
180
+ # retract from the end
181
+ @position_map_begin[p1] = p2 - addition.length
182
+ @position_map_end[p1] = @position_map_begin[p1]
183
+ else
184
+ # correct the position for end
185
+ @position_map_end[p1] = p2 - addition.length
186
+ end
187
+ elsif addition.empty? && !deletion.empty?
188
+ # If a deletion is found in the front or the rear, it is a case of overflow
189
+ @str1_match_begin = deletion.length if p1 == deletion.length
190
+ @str1_match_end = l1 - deletion.length if p1 == @len1
191
+
192
+ deletion.each{|p| @position_map_begin[p], @position_map_end[p] = p2, p2}
193
+ elsif !addition.empty? && !deletion.empty?
194
+ # If an addition and a deletion are both found in the front or the rear,
195
+ # the overflow/underflow is approximated to the difference.
196
+ al, dl = addition.length, deletion.length
197
+ @front_overflow = dl - al if p1 == dl
198
+ @rear_overflow = dl - al if p1 == @len1
199
+
200
+ @mapped_elements << [@str1[deletion[0], dl], @str2[addition[0], al]]
201
+
202
+ @position_map_begin[deletion[0]], @position_map_end[deletion[0]] = addition[0], addition[0]
203
+ deletion[1..-1].each{|p| @position_map_begin[p], @position_map_end[p] = nil, nil}
204
+ end
205
+
206
+ addition.clear; deletion.clear
207
+ p1 += l1; p2 += l2
208
+
209
+ elsif p2 < @len2 && (p1 == @len1 || @glcs[p1][p2 + 1] > @glcs[p1 + 1][p2])
210
+ diff_string2 += @str2[p2]
211
+
212
+ addition << p2
213
+ p2 += 1
214
+ elsif p1 < @len1 && (p2 == @len2 || @glcs[p1][p2 + 1] <= @glcs[p1 + 1][p2])
215
+ diff_string1 += @str1[p1]
216
+
217
+ deletion << p1
218
+ p1 += 1
219
+ end
220
+ end
221
+
222
+ @common_elements.pop
223
+ @diff_strings = [diff_string1, diff_string2]
224
+ end
225
+
226
+ # General prefix comparison is performed based on the dictionary.
227
+ # The pair of matched suffixes are returned when found.
228
+ # Otherwise, the pair of nil values are returned.
229
+ def _prefix_eq(str1, str2)
230
+ return nil, nil if str1.empty? || str2.empty?
231
+ prefixes1 = @pdic.prefixes(str1)
232
+ prefixes1.each {|p1| p2 = @dic[p1]; return p1, p2 if str2.start_with?(p2)}
233
+ return str1[0], str2[0] if (str1[0] == str2[0])
234
+ return nil, nil
235
+ end
236
+
237
+ end
238
+
239
+ if __FILE__ == $0
240
+
241
+ dictionary = [
242
+ ["×", "x"], #U+00D7 (multiplication sign)
243
+ ["•", "*"], #U+2022 (bullet)
244
+ ["Δ", "delta"], #U+0394 (greek capital letter delta)
245
+ ["Φ", "phi"], #U+03A6 (greek capital letter phi)
246
+ ["α", "alpha"], #U+03B1 (greek small letter alpha)
247
+ ["β", "beta"], #U+03B2 (greek small letter beta)
248
+ ["γ", "gamma"], #U+03B3 (greek small letter gamma)
249
+ ["δ", "delta"], #U+03B4 (greek small letter delta)
250
+ ["ε", "epsilon"], #U+03B5 (greek small letter epsilon)
251
+ ["κ", "kappa"], #U+03BA (greek small letter kappa)
252
+ ["λ", "lambda"], #U+03BB (greek small letter lambda)
253
+ ["μ", "mu"], #U+03BC (greek small letter mu)
254
+ ["χ", "chi"], #U+03C7 (greek small letter chi)
255
+ ["ϕ", "phi"], #U+03D5 (greek phi symbol)
256
+ [" ", " "], #U+2009 (thin space)
257
+ [" ", " "], #U+200A (hair space)
258
+ [" ", " "], #U+00A0 (no-break space)
259
+ [" ", " "], #U+3000 (ideographic space)
260
+ ["−", "-"], #U+2212 (minus sign)
261
+ ["–", "-"], #U+2013 (en dash)
262
+ ["′", "'"], #U+2032 (prime)
263
+ ["‘", "'"], #U+2018 (left single quotation mark)
264
+ ["’", "'"], #U+2019 (right single quotation mark)
265
+ ["“", '"'], #U+201C (left double quotation mark)
266
+ ["”", '"'] #U+201D (right double quotation mark)
267
+ ]
268
+
269
+ # str1 = "-betakappaxyz-"
270
+ # str2 = "-ijkβκ-"
271
+
272
+ # str1 = "-βκ-β-z-xy"
273
+ # str2 = "abc-betakappa-beta-z"
274
+
275
+ # str1 = "-βκ-z-xy"
276
+ # str2 = "abc-betakappa-z"
277
+
278
+ # str1 = "abc-βκ-β-z"
279
+ # str2 = "-betakappa-beta-z-xyz"
280
+
281
+ # str1 = "-β-"
282
+ # str2 = "-beta-"
283
+
284
+ # str1 = "-κ-"
285
+ # str2 = "-kappa-"
286
+
287
+ # str1 = File.read(ARGV[0]).strip
288
+ # str2 = File.read(ARGV[1]).strip
289
+
290
+ str1 = "beta"
291
+ str2 = "β***"
292
+
293
+ # puts "str1: #{str1}"
294
+ # puts "str2: #{str2}"
295
+ sa = TextAlignment::GLCSAlignment.new(str1, str2, dictionary)
296
+ sa.position_map_begin.each {|h| p h}
297
+ puts '-----'
298
+ sa.position_map_end.each {|h| p h}
299
+ puts '-----'
300
+ puts "common_elements: #{sa.common_elements}"
301
+ puts '-----'
302
+ puts "mapped_elements: #{sa.mapped_elements}"
303
+ puts '-----'
304
+ # puts "diff_string1: #{sa.diff_strings[0]}"
305
+ # puts "diff_string2: #{sa.diff_strings[1]}"
306
+ puts "front_overflow: #{sa.front_overflow}"
307
+ puts "rear_overflow : #{sa.rear_overflow}"
308
+ puts '-----'
309
+ puts "similarity : #{sa.similarity}"
310
+ puts "similarity(cut): #{sa.similarity(true)}"
311
+ end