text_alignment 0.5.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 041aa1d92ea6bb54ca9fd005d0a8684b8c012b0cb55e5ea00d54be357eae646c
4
- data.tar.gz: f821ae66f4c64eb7043ec85515694c207510be860568cc86fc47b89c8e504f87
3
+ metadata.gz: 6bed1eba72da626227ab727ce22129d226539bcfae5ca22006ac26258b184d8c
4
+ data.tar.gz: d2c121ea072186fd25fd61fb90c5ffacb886c1d109b82c044a1666220b8f7d8b
5
5
  SHA512:
6
- metadata.gz: 9bf264a8789a2630e6a820b0a0833854f6b03de802d78ea60b5849e5ee6ceb0119494221fadb2220ad27edb92f44711530ef840c05c044396402e2227f71c004
7
- data.tar.gz: c4a95752b186092d2acc48dbdbeedde6ca952da02dcf206c95304f11f6d0c433dd2ad25f50729dceb5610fee438f9ca4df7171db66af7f2904a247ba50105149
6
+ metadata.gz: 6e526995325e79fdde8ecd729c04e2e6a21e13f0166acc39b341133055275a1bbd5a3318f78dd5af4a72237c140fa8eb06270441a16e2426e58a57183b91ca6a
7
+ data.tar.gz: ec423d59036b1ee5595141428fe320f0e9ca16b8b2660d46a0f59f376c3845ad70196d006c2f83390ac12f98b35ff14a1098fcd24cda0ee1c6534f36915def81
@@ -137,26 +137,26 @@ else
137
137
 
138
138
  source_text = source_annotations[:text]
139
139
 
140
- # puts "[block alignment]"
140
+ puts "[block alignment]"
141
141
  puts alignment.alignment_show
142
- # puts "====="
142
+ puts "====="
143
143
  # exit
144
144
 
145
145
  # verification of source denotations
146
- # puts "[Invalid source denotations]"
147
- # source_annotations[:denotations] do |d|
148
- # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
149
- # end
150
- # puts "====="
151
- # puts
146
+ puts "[Invalid source denotations]"
147
+ source_annotations[:denotations] do |d|
148
+ p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
149
+ end
150
+ puts "====="
151
+ puts
152
152
 
153
153
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
154
- # puts "[Invalid transformation]"
155
- # denotations.each do |d|
156
- # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
157
- # end
158
- # puts "====="
159
- # puts
154
+ puts "[Invalid transformation]"
155
+ denotations.each do |d|
156
+ p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
157
+ end
158
+ puts "====="
159
+ puts
160
160
 
161
161
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
162
162
 
@@ -33,7 +33,9 @@ class TextAlignment::LCSComparison
33
33
  @str2_match_initial = sdiff[match_initial].new_position
34
34
  @str1_match_final = sdiff[match_final].old_position
35
35
  @str2_match_final = sdiff[match_final].new_position
36
- @similarity = 2 * lcs / ((@str1_match_final - @str1_match_initial + 1) + (@str2_match_final - @str2_match_initial + 1)).to_f
36
+ mlcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
37
+ @similarity = 2 * mlcs / (str1[@str1_match_initial .. @str1_match_final].scan(/\S/).count + str2[@str2_match_initial .. @str2_match_final].scan(/\S/).count).to_f
38
+ # @similarity = 2 * lcs / (str1[@str1_match_initial .. @str1_match_final].length + str2[@str2_match_initial .. @str2_match_final].length).to_f
37
39
  else
38
40
  @str1_match_initial = 0
39
41
  @str2_match_initial = 0
@@ -17,9 +17,10 @@ class TextAlignment::MixedAlignment
17
17
  attr_reader :similarity
18
18
  attr_reader :str1_match_initial, :str1_match_final, :str2_match_initial, :str2_match_final
19
19
 
20
- def initialize(str1, str2, mappings = [])
21
- raise ArgumentError, "nil string" if str1.nil? || str2.nil?
22
- mappings ||= []
20
+ def initialize(_str1, _str2)
21
+ raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
22
+
23
+ str1, str2, mappings = string_preprocessing(_str1, _str2)
23
24
 
24
25
  _compute_mixed_alignment(str1, str2, mappings)
25
26
  end
@@ -62,7 +63,7 @@ class TextAlignment::MixedAlignment
62
63
  end
63
64
 
64
65
  cmp = TextAlignment::LCSComparison.new(str1, str2, lcs, @sdiff)
65
- @similarity = cmp.similarity
66
+ @similarity = compute_similarity(str1, str2, @sdiff)
66
67
  @str1_match_initial = cmp.str1_match_initial
67
68
  @str1_match_final = cmp.str1_match_final
68
69
  @str2_match_initial = cmp.str2_match_initial
@@ -137,4 +138,73 @@ class TextAlignment::MixedAlignment
137
138
  @position_map_begin = posmap_begin.sort.to_h
138
139
  @position_map_end = posmap_end.sort.to_h
139
140
  end
141
+
142
+ private
143
+
144
+ def string_preprocessing(_str1, _str2)
145
+ str1 = _str1.dup
146
+ str2 = _str2.dup
147
+ mappings = TextAlignment::MAPPINGS.dup
148
+
149
+ ## single character mappings
150
+ character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
151
+ characters_from = character_mappings.collect{|m| m[0]}.join
152
+ characters_to = character_mappings.collect{|m| m[1]}.join
153
+ characters_to.gsub!(/-/, '\-')
154
+
155
+ str1.tr!(characters_from, characters_to)
156
+ str2.tr!(characters_from, characters_to)
157
+
158
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
159
+
160
+ ## long to one character mappings
161
+ pletters = TextAlignment::PADDING_LETTERS
162
+
163
+ # find the padding letter for str1
164
+ @padding_letter1 = begin
165
+ i = pletters.index{|l| str2.index(l).nil?}
166
+ raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
167
+ TextAlignment::PADDING_LETTERS[i]
168
+ end
169
+
170
+ # find the padding letter for str2
171
+ @padding_letter2 = begin
172
+ i = pletters.index{|l| l != @padding_letter1 && str1.index(l).nil?}
173
+ raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
174
+ TextAlignment::PADDING_LETTERS[i]
175
+ end
176
+
177
+ # ASCII foldings
178
+ ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
179
+ ascii_foldings.each do |f|
180
+ from = f[1]
181
+
182
+ if str2.index(f[0])
183
+ to = f[0] + (@padding_letter1 * (f[1].length - 1))
184
+ str1.gsub!(from, to)
185
+ end
186
+
187
+ if str1.index(f[0])
188
+ to = f[0] + (@padding_letter2 * (f[1].length - 1))
189
+ str2.gsub!(from, to)
190
+ end
191
+ end
192
+ mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
193
+
194
+ [str1, str2, mappings]
195
+ end
196
+
197
+ def compute_similarity(_s1, _s2, sdiff)
198
+ return 0 if sdiff.nil?
199
+
200
+ # compute the lcs only with non-whitespace letters
201
+ lcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
202
+ return 0 if lcs == 0
203
+
204
+ s1 = _s1.tr(@padding_letter1, ' ')
205
+ s2 = _s2.tr(@padding_letter2, ' ')
206
+
207
+ similarity = lcs / [s1.scan(/\S/).count, s2.scan(/\S/).count].min.to_f
208
+ end
209
+
140
210
  end
@@ -8,30 +8,27 @@ module TextAlignment; end unless defined? TextAlignment
8
8
  TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
9
9
 
10
10
  class TextAlignment::TextAlignment
11
- attr_reader :block_alignments
11
+ attr_reader :block_alignment
12
12
  attr_reader :similarity
13
13
  attr_reader :lost_annotations
14
14
 
15
- def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
- raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
15
+ def initialize(str1, str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
+ raise ArgumentError, "nil string" if str1.nil? || str2.nil?
17
17
 
18
- @ostr1 = _str1
19
- @ostr2 = _str2
20
-
21
- str1, str2, mappings = string_preprocessing(_str1, _str2)
18
+ @block_alignment = {source_text:str1, target_text:str2}
22
19
 
23
20
  # try exact match
24
21
  block_begin = str2.index(str1)
25
22
  unless block_begin.nil?
26
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
27
- return @block_alignments
23
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
24
+ return @block_alignment
28
25
  end
29
26
 
30
27
  # try exact match
31
28
  block_begin = str2.downcase.index(str1.downcase)
32
29
  unless block_begin.nil?
33
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
34
- return @block_alignments
30
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
31
+ return @block_alignment
35
32
  end
36
33
 
37
34
  anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
@@ -64,7 +61,7 @@ class TextAlignment::TextAlignment
64
61
  # puts
65
62
 
66
63
  ## To find block alignments
67
- @block_alignments = []
64
+ @block_alignment[:blocks] = []
68
65
  return if mblocks.empty?
69
66
 
70
67
  # Initial step
@@ -73,35 +70,35 @@ class TextAlignment::TextAlignment
73
70
  e2 = mblocks[0][:target][:begin]
74
71
 
75
72
  if mblocks[0][:target][:begin] == 0
76
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
73
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
77
74
  else
78
75
  _str1 = str1[0 ... e1]
79
76
  _str2 = str2[0 ... e2]
80
77
 
81
78
  unless _str1.strip.empty?
82
79
  if _str2.strip.empty?
83
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
80
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
84
81
  else
85
82
  len_min = [_str1.length, _str2.length].min
86
83
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
87
84
  b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
88
85
  b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
89
86
 
90
- @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
87
+ @block_alignment[:blocks] << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
91
88
 
92
89
  _str1 = str1[b1 ... e1]
93
90
  _str2 = str2[b2 ... e2]
94
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
95
- if alignment.similarity < 0.6
96
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
91
+ alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
92
+ if alignment.similarity < 0.5
93
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty, similarity: alignment.similarity}
97
94
  else
98
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
95
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment, similarity: alignment.similarity}
99
96
  end
100
97
  end
101
98
  end
102
99
  end
103
100
  end
104
- @block_alignments << mblocks[0].merge(alignment: :block)
101
+ @block_alignment[:blocks] << mblocks[0].merge(alignment: :block)
105
102
 
106
103
  (1 ... mblocks.length).each do |i|
107
104
  b1 = mblocks[i - 1][:source][:end]
@@ -112,17 +109,17 @@ class TextAlignment::TextAlignment
112
109
  _str2 = str2[b2 ... e2]
113
110
  unless _str1.strip.empty?
114
111
  if _str2.strip.empty?
115
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
112
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
116
113
  else
117
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
118
- if alignment.similarity < 0.6
119
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
114
+ alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
115
+ if alignment.similarity < 0.5
116
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: alignment.similarity}
120
117
  else
121
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
118
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment, similarity: alignment.similarity}
122
119
  end
123
120
  end
124
121
  end
125
- @block_alignments << mblocks[i].merge(alignment: :block)
122
+ @block_alignment[:blocks] << mblocks[i].merge(alignment: :block)
126
123
  end
127
124
 
128
125
  # Final step
@@ -134,7 +131,7 @@ class TextAlignment::TextAlignment
134
131
 
135
132
  unless _str1.strip.empty?
136
133
  if _str2.strip.empty?
137
- @block_alignments << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
134
+ @block_alignment[:blocks] << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
138
135
  else
139
136
  len_min = [_str1.length, _str2.length].min
140
137
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
@@ -143,58 +140,58 @@ class TextAlignment::TextAlignment
143
140
  _str1 = str1[b1 ... e1]
144
141
  _str2 = str2[b2 ... e2]
145
142
 
146
- alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
147
- if alignment.similarity < 0.6
148
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
143
+ alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase)
144
+ if alignment.similarity < 0.5
145
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: alignment.similarity}
149
146
  else
150
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
147
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment, similarity: alignment.similarity}
151
148
  end
152
149
 
153
- @block_alignments << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
150
+ @block_alignment[:blocks] << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
154
151
  end
155
152
  end
156
153
  end
157
154
 
158
- @block_alignments.each do |a|
155
+ @block_alignment[:blocks].each do |a|
159
156
  a[:delta] = a[:target][:begin] - a[:source][:begin]
160
157
  end
161
158
  end
162
159
 
163
160
  def transform_begin_position(begin_position)
164
- i = @block_alignments.index{|b| b[:source][:end] > begin_position}
165
- block_alignment = @block_alignments[i]
166
-
167
- b = if block_alignment[:alignment] == :block
168
- begin_position + block_alignment[:delta]
169
- elsif block_alignment[:alignment] == :empty
170
- if begin_position == block_alignment[:source][:begin]
171
- block_alignment[:target][:begin]
161
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
162
+ block = @block_alignment[:blocks][i]
163
+
164
+ b = if block[:alignment] == :block
165
+ begin_position + block[:delta]
166
+ elsif block[:alignment] == :empty
167
+ if begin_position == block[:source][:begin]
168
+ block[:target][:begin]
172
169
  else
173
170
  # raise "lost annotation"
174
171
  nil
175
172
  end
176
173
  else
177
- r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
178
- r.nil? ? nil : r + block_alignment[:target][:begin]
174
+ r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
175
+ r.nil? ? nil : r + block[:target][:begin]
179
176
  end
180
177
  end
181
178
 
182
179
  def transform_end_position(end_position)
183
- i = @block_alignments.index{|b| b[:source][:end] >= end_position}
184
- block_alignment = @block_alignments[i]
185
-
186
- e = if block_alignment[:alignment] == :block
187
- end_position + block_alignment[:delta]
188
- elsif block_alignment[:alignment] == :empty
189
- if end_position == block_alignment[:source][:end]
190
- block_alignment[:target][:end]
180
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
181
+ block = @block_alignment[:blocks][i]
182
+
183
+ e = if block[:alignment] == :block
184
+ end_position + block[:delta]
185
+ elsif block[:alignment] == :empty
186
+ if end_position == block[:source][:end]
187
+ block[:target][:end]
191
188
  else
192
189
  # raise "lost annotation"
193
190
  nil
194
191
  end
195
192
  else
196
- r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
197
- r.nil? ? nil : r + block_alignment[:target][:begin]
193
+ r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
194
+ r.nil? ? nil : r + block[:target][:begin]
198
195
  end
199
196
  end
200
197
 
@@ -240,83 +237,22 @@ class TextAlignment::TextAlignment
240
237
  r
241
238
  end
242
239
 
243
- def alignment_table
244
- table = <<-TABLE
245
- <table class='text_alignment_table'>
246
- <thead>
247
- <tr>
248
- <th class='text_alignment_left' style='width:50%'>Text 1</th>
249
- <th class='text_alignment_rigt'>Text 2</th>
250
- </tr>
251
- </thead>
252
- <tbody>
253
- TABLE
254
-
255
- @block_alignments.each do |a|
256
- table += alignment_table_th(a)
257
- table += "<tr>\n" + case a[:alignment]
258
- when :block
259
- "<td colspan='2' class='text_alignment_common'>" +
260
- @ostr1[a[:source][:begin] ... a[:source][:end]] +
261
- "</td>\n"
262
- when :empty
263
- "<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
264
- "<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
265
- else
266
- base = a[:source][:begin]
267
- astr1 = a[:alignment].sdiff.map do |c|
268
- case c.action
269
- when '='
270
- @ostr1[c.old_position + base]
271
- when '+'
272
- '_'
273
- when '-'
274
- @ostr1[c.old_position + base]
275
- when '!'
276
- @ostr1[c.old_position + base] + '_'
277
- end
278
- end.join('')
279
-
280
- base = a[:target][:begin]
281
- astr2 = a[:alignment].sdiff.map do |c|
282
- case c.action
283
- when '='
284
- @ostr2[c.new_position + base]
285
- when '+'
286
- @ostr2[c.new_position + base]
287
- when '-'
288
- '_'
289
- when '!'
290
- '_' + @ostr2[c.new_position + base]
291
- end
292
- end.join('')
293
-
294
- "<td class='text_alignment_left'>" + astr1 + "</td>\n" +
295
- "<td class='text_alignment_right'>" + astr2 + "</td>\n"
296
- end + "</tr>\n"
297
- end
298
- table += '</tbody></table>'
299
- end
300
-
301
- def alignment_table_th(a)
302
- "<tr>" +
303
- "<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
304
- "<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
305
- "</tr>"
306
- end
307
-
308
240
  def alignment_show
241
+ stext = @block_alignment[:source_text]
242
+ ttext = @block_alignment[:target_text]
243
+
309
244
  show = ''
310
- @block_alignments.each do |a|
245
+ @block_alignment[:blocks].each do |a|
311
246
  show += case a[:alignment]
312
247
  when :block
313
- "===== common =====\n" +
314
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
248
+ "===== common ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
249
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
315
250
  when :empty
316
- "<<<<< string 1\n" +
317
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
318
- ">>>>> string 2\n" +
319
- @ostr2[a[:target][:begin] ... a[:target][:end]] + "\n\n"
251
+ "xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
252
+ "<<<<< string 1 [#{a[:source][:begin]} - #{a[:source][:end]}]\n" +
253
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
254
+ ">>>>> string 2 [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
255
+ ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
320
256
  else
321
257
  astr1 = ''
322
258
  astr2 = ''
@@ -325,13 +261,13 @@ class TextAlignment::TextAlignment
325
261
  astr1 = a[:alignment].sdiff.map do |c|
326
262
  case c.action
327
263
  when '='
328
- @ostr1[c.old_position + base]
264
+ stext[c.old_position + base]
329
265
  when '+'
330
266
  '_'
331
267
  when '-'
332
- @ostr1[c.old_position + base]
268
+ stext[c.old_position + base]
333
269
  when '!'
334
- @ostr1[c.old_position + base] + '_'
270
+ stext[c.old_position + base] + '_'
335
271
  end
336
272
  end.join('')
337
273
 
@@ -339,17 +275,17 @@ class TextAlignment::TextAlignment
339
275
  astr2 = a[:alignment].sdiff.map do |c|
340
276
  case c.action
341
277
  when '='
342
- @ostr2[c.new_position + base]
278
+ ttext[c.new_position + base]
343
279
  when '+'
344
- @ostr2[c.new_position + base]
280
+ ttext[c.new_position + base]
345
281
  when '-'
346
282
  '_'
347
283
  when '!'
348
- '_' + @ostr2[c.new_position + base]
284
+ '_' + ttext[c.new_position + base]
349
285
  end
350
286
  end.join('')
351
287
 
352
- "***** local mismatch\n" +
288
+ "***** local mismatch [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}] (similarity: #{a[:similarity]})\n" +
353
289
  "[#{astr1}]\n" +
354
290
  "[#{astr2}]\n\n"
355
291
  end
@@ -357,59 +293,4 @@ class TextAlignment::TextAlignment
357
293
  show
358
294
  end
359
295
 
360
- private
361
-
362
- def string_preprocessing(_str1, _str2)
363
- str1 = _str1.dup
364
- str2 = _str2.dup
365
- mappings = TextAlignment::MAPPINGS.dup
366
-
367
- ## single character mappings
368
- character_mappings = mappings.select{|m| m[0].length == 1 && m[1].length == 1}
369
- characters_from = character_mappings.collect{|m| m[0]}.join
370
- characters_to = character_mappings.collect{|m| m[1]}.join
371
- characters_to.gsub!(/-/, '\-')
372
-
373
- str1.tr!(characters_from, characters_to)
374
- str2.tr!(characters_from, characters_to)
375
-
376
- mappings.delete_if{|m| m[0].length == 1 && m[1].length == 1}
377
-
378
- ## long to one character mappings
379
- pletters = TextAlignment::PADDING_LETTERS
380
-
381
- # find the padding letter for str1
382
- padding_letter1 = begin
383
- i = pletters.index{|l| str2.index(l).nil?}
384
- raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
385
- TextAlignment::PADDING_LETTERS[i]
386
- end
387
-
388
- # find the padding letter for str2
389
- padding_letter2 = begin
390
- i = pletters.index{|l| l != padding_letter1 && str1.index(l).nil?}
391
- raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
392
- TextAlignment::PADDING_LETTERS[i]
393
- end
394
-
395
- # ASCII foldings
396
- ascii_foldings = mappings.select{|m| m[0].length == 1 && m[1].length > 1}
397
- ascii_foldings.each do |f|
398
- from = f[1]
399
-
400
- if str2.index(f[0])
401
- to = f[0] + (padding_letter1 * (f[1].length - 1))
402
- str1.gsub!(from, to)
403
- end
404
-
405
- if str1.index(f[0])
406
- to = f[0] + (padding_letter2 * (f[1].length - 1))
407
- str2.gsub!(from, to)
408
- end
409
- end
410
- mappings.delete_if{|m| m[0].length == 1 && m[1].length > 1}
411
-
412
- [str1, str2, mappings]
413
- end
414
-
415
296
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.5.1'
2
+ VERSION = '0.6.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-04 00:00:00.000000000 Z
11
+ date: 2020-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary