text_alignment 0.5.1 → 0.6.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 041aa1d92ea6bb54ca9fd005d0a8684b8c012b0cb55e5ea00d54be357eae646c
4
- data.tar.gz: f821ae66f4c64eb7043ec85515694c207510be860568cc86fc47b89c8e504f87
3
+ metadata.gz: 6bed1eba72da626227ab727ce22129d226539bcfae5ca22006ac26258b184d8c
4
+ data.tar.gz: d2c121ea072186fd25fd61fb90c5ffacb886c1d109b82c044a1666220b8f7d8b
5
5
  SHA512:
6
- metadata.gz: 9bf264a8789a2630e6a820b0a0833854f6b03de802d78ea60b5849e5ee6ceb0119494221fadb2220ad27edb92f44711530ef840c05c044396402e2227f71c004
7
- data.tar.gz: c4a95752b186092d2acc48dbdbeedde6ca952da02dcf206c95304f11f6d0c433dd2ad25f50729dceb5610fee438f9ca4df7171db66af7f2904a247ba50105149
6
+ metadata.gz: 6e526995325e79fdde8ecd729c04e2e6a21e13f0166acc39b341133055275a1bbd5a3318f78dd5af4a72237c140fa8eb06270441a16e2426e58a57183b91ca6a
7
+ data.tar.gz: ec423d59036b1ee5595141428fe320f0e9ca16b8b2660d46a0f59f376c3845ad70196d006c2f83390ac12f98b35ff14a1098fcd24cda0ee1c6534f36915def81
@@ -137,26 +137,26 @@ else
137
137
 
138
138
  source_text = source_annotations[:text]
139
139
 
140
- # puts "[block alignment]"
140
+ puts "[block alignment]"
141
141
  puts alignment.alignment_show
142
- # puts "====="
142
+ puts "====="
143
143
  # exit
144
144
 
145
145
  # verification of source denotations
146
- # puts "[Invalid source denotations]"
147
- # source_annotations[:denotations] do |d|
148
- # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
149
- # end
150
- # puts "====="
151
- # puts
146
+ puts "[Invalid source denotations]"
147
+ source_annotations[:denotations] do |d|
148
+ p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
149
+ end
150
+ puts "====="
151
+ puts
152
152
 
153
153
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
154
- # puts "[Invalid transformation]"
155
- # denotations.each do |d|
156
- # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
157
- # end
158
- # puts "====="
159
- # puts
154
+ puts "[Invalid transformation]"
155
+ denotations.each do |d|
156
+ p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
157
+ end
158
+ puts "====="
159
+ puts
160
160
 
161
161
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
162
162
 
@@ -33,7 +33,9 @@ class TextAlignment::LCSComparison
33
33
  @str2_match_initial = sdiff[match_initial].new_position
34
34
  @str1_match_final = sdiff[match_final].old_position
35
35
  @str2_match_final = sdiff[match_final].new_position
36
- @similarity = 2 * lcs / ((@str1_match_final - @str1_match_initial + 1) + (@str2_match_final - @str2_match_initial + 1)).to_f
36
+ mlcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
37
+ @similarity = 2 * mlcs / (str1[@str1_match_initial .. @str1_match_final].scan(/\S/).count + str2[@str2_match_initial .. @str2_match_final].scan(/\S/).count).to_f
38
+ # @similarity = 2 * lcs / (str1[@str1_match_initial .. @str1_match_final].length + str2[@str2_match_initial .. @str2_match_final].length).to_f
37
39
  else
38
40
  @str1_match_initial = 0
39
41
  @str2_match_initial = 0
@@ -17,9 +17,10 @@ class TextAlignment::MixedAlignment
17
17
  attr_reader :similarity
18
18
  attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
19
19
 
20
- def initialize(str1, str2, mappings = [])
21
- raise ArgumentError, "nil string" if str1.nil? || str2.nil?
22
- mappings ||= []
20
+ def initialize(_str1, _str2)
21
+ raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
+
23
+ str1, str2, mappings = string_preprocessing(_str1, _str2)
23
24
 
24
25
  _compute_mixed_alignment(str1, str2, mappings)
25
26
  end
@@ -62,7 +63,7 @@ class TextAlignment::MixedAlignment
62
63
  end
63
64
 
64
65
  cmp = TextAlignment::LCSComparison.new(str1, str2, lcs, @sdiff)
65
- @similarity = cmp.similarity
66
+ @similarity = compute_similarity(str1, str2, @sdiff)
66
67
  @str1_match_initial = cmp.str1_match_initial
67
68
  @str1_match_final = cmp.str1_match_final
68
69
  @str2_match_initial = cmp.str2_match_initial
@@ -137,4 +138,73 @@ class TextAlignment::MixedAlignment
137
138
  @position_map_begin = posmap_begin.sort.to_h
138
139
  @position_map_end = posmap_end.sort.to_h
139
140
  end
141
+
142
+ private
143
+
144
+ def string_preprocessing(_str1, _str2)
145
+ str1 = _str1.dup
146
+ str2 = _str2.dup
147
+ mappings = TextAlignment::MAPPINGS.dup
148
+
149
+ ## single character mappings
150
+ character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
151
+ characters_from = character_mappings.collect{|m| m[0]}.join
152
+ characters_to = character_mappings.collect{|m| m[1]}.join
153
+ characters_to.gsub!(/-/, '\-')
154
+
155
+ str1.tr!(characters_from, characters_to)
156
+ str2.tr!(characters_from, characters_to)
157
+
158
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
159
+
160
+ ## long to one character mappings
161
+ pletters = TextAlignment::PADDING_LETTERS
162
+
163
+ # find the padding letter for str1
164
+ @padding_letter1 = begin
165
+ i = pletters.index{|l| str2.index(l).nil?}
166
+ raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
167
+ TextAlignment::PADDING_LETTERS[i]
168
+ end
169
+
170
+ # find the padding letter for str2
171
+ @padding_letter2 = begin
172
+ i = pletters.index{|l| l != @padding_letter1 && str1.index(l).nil?}
173
+ raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
174
+ TextAlignment::PADDING_LETTERS[i]
175
+ end
176
+
177
+ # ASCII foldings
178
+ ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
179
+ ascii_foldings.each do |f|
180
+ from = f[1]
181
+
182
+ if str2.index(f[0])
183
+ to = f[0] + (@padding_letter1 * (f[1].length - 1))
184
+ str1.gsub!(from, to)
185
+ end
186
+
187
+ if str1.index(f[0])
188
+ to = f[0] + (@padding_letter2 * (f[1].length - 1))
189
+ str2.gsub!(from, to)
190
+ end
191
+ end
192
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
193
+
194
+ [str1, str2, mappings]
195
+ end
196
+
197
+ def compute_similarity(_s1, _s2, sdiff)
198
+ return 0 if sdiff.nil?
199
+
200
+ # compute the lcs only with non-whitespace letters
201
+ lcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
202
+ return 0 if lcs == 0
203
+
204
+ s1 = _s1.tr(@padding_letter1, ' ')
205
+ s2 = _s2.tr(@padding_letter2, ' ')
206
+
207
+ similarity = lcs / [s1.scan(/\S/).count, s2.scan(/\S/).count].min.to_f
208
+ end
209
+
140
210
  end
@@ -8,30 +8,27 @@ module TextAlignment; end unless defined? TextAlignment
8
8
  TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
9
9
 
10
10
  class TextAlignment::TextAlignment
11
- attr_reader :block_alignments
11
+ attr_reader :block_alignment
12
12
  attr_reader :similarity
13
13
  attr_reader :lost_annotations
14
14
 
15
- def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
- raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
15
+ def initialize(str1, str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
+ raise ArgumentError, "nil string" if str1.nil? || str2.nil?
17
17
 
18
- @ostr1 = _str1
19
- @ostr2 = _str2
20
-
21
- str1, str2, mappings = string_preprocessing(_str1, _str2)
18
+ @block_alignment = {source_text:str1, target_text:str2}
22
19
 
23
20
  # try exact match
24
21
  block_begin = str2.index(str1)
25
22
  unless block_begin.nil?
26
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
27
- return @block_alignments
23
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
24
+ return @block_alignment
28
25
  end
29
26
 
30
27
  # try exact match
31
28
  block_begin = str2.downcase.index(str1.downcase)
32
29
  unless block_begin.nil?
33
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
34
- return @block_alignments
30
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
31
+ return @block_alignment
35
32
  end
36
33
 
37
34
  anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
@@ -64,7 +61,7 @@ class TextAlignment::TextAlignment
64
61
  # puts
65
62
 
66
63
  ## To find block alignments
67
- @block_alignments = []
64
+ @block_alignment[:blocks] = []
68
65
  return if mblocks.empty?
69
66
 
70
67
  # Initial step
@@ -73,35 +70,35 @@ class TextAlignment::TextAlignment
73
70
  e2 = mblocks[0][:target][:begin]
74
71
 
75
72
  if mblocks[0][:target][:begin] == 0
76
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
73
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
77
74
  else
78
75
  _str1 = str1[0 ... e1]
79
76
  _str2 = str2[0 ... e2]
80
77
 
81
78
  unless _str1.strip.empty?
82
79
  if _str2.strip.empty?
83
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
80
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
84
81
  else
85
82
  len_min = [_str1.length, _str2.length].min
86
83
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
87
84
  b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
88
85
  b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
89
86
 
90
- @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
87
+ @block_alignment[:blocks] << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
91
88
 
92
89
  _str1 = str1[b1 ... e1]
93
90
  _str2 = str2[b2 ... e2]
94
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
95
- if alignment.similarity < 0.6
96
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
91
+ alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
92
+ if alignment.similarity < 0.5
93
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty, similarity: alignment.similarity}
97
94
  else
98
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
95
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment, similarity: alignment.similarity}
99
96
  end
100
97
  end
101
98
  end
102
99
  end
103
100
  end
104
- @block_alignments << mblocks[0].merge(alignment: :block)
101
+ @block_alignment[:blocks] << mblocks[0].merge(alignment: :block)
105
102
 
106
103
  (1 ... mblocks.length).each do |i|
107
104
  b1 = mblocks[i - 1][:source][:end]
@@ -112,17 +109,17 @@ class TextAlignment::TextAlignment
112
109
  _str2 = str2[b2 ... e2]
113
110
  unless _str1.strip.empty?
114
111
  if _str2.strip.empty?
115
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
112
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
116
113
  else
117
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
118
- if alignment.similarity < 0.6
119
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
114
+ alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
115
+ if alignment.similarity < 0.5
116
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: alignment.similarity}
120
117
  else
121
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
118
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment, similarity: alignment.similarity}
122
119
  end
123
120
  end
124
121
  end
125
- @block_alignments << mblocks[i].merge(alignment: :block)
122
+ @block_alignment[:blocks] << mblocks[i].merge(alignment: :block)
126
123
  end
127
124
 
128
125
  # Final step
@@ -134,7 +131,7 @@ class TextAlignment::TextAlignment
134
131
 
135
132
  unless _str1.strip.empty?
136
133
  if _str2.strip.empty?
137
- @block_alignments << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
134
+ @block_alignment[:blocks] << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
138
135
  else
139
136
  len_min = [_str1.length, _str2.length].min
140
137
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
@@ -143,58 +140,58 @@ class TextAlignment::TextAlignment
143
140
  _str1 = str1[b1 ... e1]
144
141
  _str2 = str2[b2 ... e2]
145
142
 
146
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
147
- if alignment.similarity < 0.6
148
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
143
+ alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
144
+ if alignment.similarity < 0.5
145
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: alignment.similarity}
149
146
  else
150
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
147
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment, similarity: alignment.similarity}
151
148
  end
152
149
 
153
- @block_alignments << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
150
+ @block_alignment[:blocks] << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
154
151
  end
155
152
  end
156
153
  end
157
154
 
158
- @block_alignments.each do |a|
155
+ @block_alignment[:blocks].each do |a|
159
156
  a[:delta] = a[:target][:begin] - a[:source][:begin]
160
157
  end
161
158
  end
162
159
 
163
160
  def transform_begin_position(begin_position)
164
- i = @block_alignments.index{|b| b[:source][:end] > begin_position}
165
- block_alignment = @block_alignments[i]
166
-
167
- b = if block_alignment[:alignment] == :block
168
- begin_position + block_alignment[:delta]
169
- elsif block_alignment[:alignment] == :empty
170
- if begin_position == block_alignment[:source][:begin]
171
- block_alignment[:target][:begin]
161
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
162
+ block = @block_alignment[:blocks][i]
163
+
164
+ b = if block[:alignment] == :block
165
+ begin_position + block[:delta]
166
+ elsif block[:alignment] == :empty
167
+ if begin_position == block[:source][:begin]
168
+ block[:target][:begin]
172
169
  else
173
170
  # raise "lost annotation"
174
171
  nil
175
172
  end
176
173
  else
177
- r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
178
- r.nil? ? nil : r + block_alignment[:target][:begin]
174
+ r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
175
+ r.nil? ? nil : r + block[:target][:begin]
179
176
  end
180
177
  end
181
178
 
182
179
  def transform_end_position(end_position)
183
- i = @block_alignments.index{|b| b[:source][:end] >= end_position}
184
- block_alignment = @block_alignments[i]
185
-
186
- e = if block_alignment[:alignment] == :block
187
- end_position + block_alignment[:delta]
188
- elsif block_alignment[:alignment] == :empty
189
- if end_position == block_alignment[:source][:end]
190
- block_alignment[:target][:end]
180
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
181
+ block = @block_alignment[:blocks][i]
182
+
183
+ e = if block[:alignment] == :block
184
+ end_position + block[:delta]
185
+ elsif block[:alignment] == :empty
186
+ if end_position == block[:source][:end]
187
+ block[:target][:end]
191
188
  else
192
189
  # raise "lost annotation"
193
190
  nil
194
191
  end
195
192
  else
196
- r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
197
- r.nil? ? nil : r + block_alignment[:target][:begin]
193
+ r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
194
+ r.nil? ? nil : r + block[:target][:begin]
198
195
  end
199
196
  end
200
197
 
@@ -240,83 +237,22 @@ class TextAlignment::TextAlignment
240
237
  r
241
238
  end
242
239
 
243
- def alignment_table
244
- table = <<-TABLE
245
- <table class='text_alignment_table'>
246
- <thead>
247
- <tr>
248
- <th class='text_alignment_left' style='width:50%'>Text 1</th>
249
- <th class='text_alignment_rigt'>Text 2</th>
250
- </tr>
251
- </thead>
252
- <tbody>
253
- TABLE
254
-
255
- @block_alignments.each do |a|
256
- table += alignment_table_th(a)
257
- table += "<tr>\n" + case a[:alignment]
258
- when :block
259
- "<td colspan='2' class='text_alignment_common'>" +
260
- @ostr1[a[:source][:begin] ... a[:source][:end]] +
261
- "</td>\n"
262
- when :empty
263
- "<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
264
- "<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
265
- else
266
- base = a[:source][:begin]
267
- astr1 = a[:alignment].sdiff.map do |c|
268
- case c.action
269
- when '='
270
- @ostr1[c.old_position + base]
271
- when '+'
272
- '_'
273
- when '-'
274
- @ostr1[c.old_position + base]
275
- when '!'
276
- @ostr1[c.old_position + base] + '_'
277
- end
278
- end.join('')
279
-
280
- base = a[:target][:begin]
281
- astr2 = a[:alignment].sdiff.map do |c|
282
- case c.action
283
- when '='
284
- @ostr2[c.new_position + base]
285
- when '+'
286
- @ostr2[c.new_position + base]
287
- when '-'
288
- '_'
289
- when '!'
290
- '_' + @ostr2[c.new_position + base]
291
- end
292
- end.join('')
293
-
294
- "<td class='text_alignment_left'>" + astr1 + "</td>\n" +
295
- "<td class='text_alignment_right'>" + astr2 + "</td>\n"
296
- end + "</tr>\n"
297
- end
298
- table += '</tbody></table>'
299
- end
300
-
301
- def alignment_table_th(a)
302
- "<tr>" +
303
- "<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
304
- "<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
305
- "</tr>"
306
- end
307
-
308
240
  def alignment_show
241
+ stext = @block_alignment[:source_text]
242
+ ttext = @block_alignment[:target_text]
243
+
309
244
  show = ''
310
- @block_alignments.each do |a|
245
+ @block_alignment[:blocks].each do |a|
311
246
  show += case a[:alignment]
312
247
  when :block
313
- "===== common =====\n" +
314
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
248
+ "===== common ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
249
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
315
250
  when :empty
316
- "<<<<< string 1\n" +
317
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
318
- ">>>>> string 2\n" +
319
- @ostr2[a[:target][:begin] ... a[:target][:end]] + "\n\n"
251
+ "xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
252
+ "<<<<< string 1 [#{a[:source][:begin]} - #{a[:source][:end]}]\n" +
253
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
254
+ ">>>>> string 2 [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
255
+ ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
320
256
  else
321
257
  astr1 = ''
322
258
  astr2 = ''
@@ -325,13 +261,13 @@ class TextAlignment::TextAlignment
325
261
  astr1 = a[:alignment].sdiff.map do |c|
326
262
  case c.action
327
263
  when '='
328
- @ostr1[c.old_position + base]
264
+ stext[c.old_position + base]
329
265
  when '+'
330
266
  '_'
331
267
  when '-'
332
- @ostr1[c.old_position + base]
268
+ stext[c.old_position + base]
333
269
  when '!'
334
- @ostr1[c.old_position + base] + '_'
270
+ stext[c.old_position + base] + '_'
335
271
  end
336
272
  end.join('')
337
273
 
@@ -339,17 +275,17 @@ class TextAlignment::TextAlignment
339
275
  astr2 = a[:alignment].sdiff.map do |c|
340
276
  case c.action
341
277
  when '='
342
- @ostr2[c.new_position + base]
278
+ ttext[c.new_position + base]
343
279
  when '+'
344
- @ostr2[c.new_position + base]
280
+ ttext[c.new_position + base]
345
281
  when '-'
346
282
  '_'
347
283
  when '!'
348
- '_' + @ostr2[c.new_position + base]
284
+ '_' + ttext[c.new_position + base]
349
285
  end
350
286
  end.join('')
351
287
 
352
- "***** local mismatch\n" +
288
+ "***** local mismatch [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}] (similarity: #{a[:similarity]})\n" +
353
289
  "[#{astr1}]\n" +
354
290
  "[#{astr2}]\n\n"
355
291
  end
@@ -357,59 +293,4 @@ class TextAlignment::TextAlignment
357
293
  show
358
294
  end
359
295
 
360
- private
361
-
362
- def string_preprocessing(_str1, _str2)
363
- str1 = _str1.dup
364
- str2 = _str2.dup
365
- mappings = TextAlignment::MAPPINGS.dup
366
-
367
- ## single character mappings
368
- character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
369
- characters_from = character_mappings.collect{|m| m[0]}.join
370
- characters_to = character_mappings.collect{|m| m[1]}.join
371
- characters_to.gsub!(/-/, '\-')
372
-
373
- str1.tr!(characters_from, characters_to)
374
- str2.tr!(characters_from, characters_to)
375
-
376
- mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
377
-
378
- ## long to one character mappings
379
- pletters = TextAlignment::PADDING_LETTERS
380
-
381
- # find the padding letter for str1
382
- padding_letter1 = begin
383
- i = pletters.index{|l| str2.index(l).nil?}
384
- raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
385
- TextAlignment::PADDING_LETTERS[i]
386
- end
387
-
388
- # find the padding letter for str2
389
- padding_letter2 = begin
390
- i = pletters.index{|l| l != padding_letter1 && str1.index(l).nil?}
391
- raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
392
- TextAlignment::PADDING_LETTERS[i]
393
- end
394
-
395
- # ASCII foldings
396
- ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
397
- ascii_foldings.each do |f|
398
- from = f[1]
399
-
400
- if str2.index(f[0])
401
- to = f[0] + (padding_letter1 * (f[1].length - 1))
402
- str1.gsub!(from, to)
403
- end
404
-
405
- if str1.index(f[0])
406
- to = f[0] + (padding_letter2 * (f[1].length - 1))
407
- str2.gsub!(from, to)
408
- end
409
- end
410
- mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
411
-
412
- [str1, str2, mappings]
413
- end
414
-
415
296
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.5.1'
2
+ VERSION = '0.6.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-04 00:00:00.000000000 Z
11
+ date: 2020-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary