text_alignment 0.4.3 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/align_annotations +40 -49
- data/lib/text_alignment/text_alignment.rb +133 -5
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f5f7f27c8628123530d51d0a68060aa6fb850bcef8c7089c8bf990f7257a80b
|
4
|
+
data.tar.gz: 45f768df4e7d89c931295985adb31df9e725156e5a85d7f78a5b7cd26d00be4d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4004293fa10eb247110764d16a24900a10e714072227dd6e5626d2123deca4bcec00b7255e1affaba4d7dda75d7e42049aabf6b1e6c51f4c005124443b5f9ffc
|
7
|
+
data.tar.gz: eb5c8a2c89c8973242bb77457e0f8d9922486d9743e7c39ee12ea4025ece6d888be5308eaddec076e5a21c84149dcb31bc0ed6fcd68881fb6c8c4063e49fb64d
|
data/bin/align_annotations
CHANGED
@@ -103,65 +103,56 @@ target_annotations = if source_annotations.class == Array
|
|
103
103
|
else
|
104
104
|
alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
|
105
105
|
|
106
|
-
pp alignment
|
106
|
+
# pp alignment
|
107
107
|
|
108
108
|
# verification
|
109
|
-
source_text = source_annotations[:text]
|
110
|
-
puts "=====BEGIN"
|
111
|
-
(0 ... source_text.rstrip.length).each do |p|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
119
|
-
puts
|
120
|
-
puts "=====END"
|
121
|
-
|
122
|
-
puts "=====BEGIN"
|
123
|
-
(0 .. source_text.rstrip.length).each do |p|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
else
|
128
|
-
print '.'
|
129
|
-
end
|
130
|
-
end
|
131
|
-
puts
|
132
|
-
puts "=====END"
|
133
|
-
|
134
|
-
# alignment.block_alignments.each do |a|
|
135
|
-
# if a[:alignment].nil? || a[:alignment] == :empty
|
136
|
-
# # p [a[:source], a[:target]]
|
137
|
-
# # p a[:alignment]
|
109
|
+
# source_text = source_annotations[:text]
|
110
|
+
# puts "=====BEGIN"
|
111
|
+
# (0 ... source_text.rstrip.length).each do |p|
|
112
|
+
# t = alignment.transform_begin_position(p)
|
113
|
+
# if t.nil?
|
114
|
+
# print source_text[p]
|
115
|
+
# else
|
116
|
+
# print '.'
|
117
|
+
# end
|
118
|
+
# end
|
119
|
+
# puts
|
120
|
+
# puts "=====END"
|
121
|
+
|
122
|
+
# puts "=====BEGIN"
|
123
|
+
# (0 .. source_text.rstrip.length).each do |p|
|
124
|
+
# t = alignment.transform_end_position(p)
|
125
|
+
# if t.nil?
|
126
|
+
# print source_text[p]
|
138
127
|
# else
|
139
|
-
#
|
140
|
-
# p a[:alignment].similarity
|
141
|
-
# puts "--"
|
142
|
-
# puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
|
143
|
-
# puts "--"
|
144
|
-
# puts target_text[a[:target][:begin] ... a[:target][:end]]
|
145
|
-
# puts "======"
|
128
|
+
# print '.'
|
146
129
|
# end
|
147
130
|
# end
|
131
|
+
# puts
|
132
|
+
# puts "=====END"
|
133
|
+
|
134
|
+
source_text = source_annotations[:text]
|
135
|
+
|
136
|
+
# puts "[block alignment]"
|
137
|
+
puts alignment.alignment_table
|
138
|
+
# puts "====="
|
148
139
|
# exit
|
149
140
|
|
150
141
|
# verification of source denotations
|
151
|
-
puts "[Invalid source denotations]"
|
152
|
-
source_annotations[:denotations] do |d|
|
153
|
-
|
154
|
-
end
|
155
|
-
puts "====="
|
156
|
-
puts
|
142
|
+
# puts "[Invalid source denotations]"
|
143
|
+
# source_annotations[:denotations] do |d|
|
144
|
+
# p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
|
145
|
+
# end
|
146
|
+
# puts "====="
|
147
|
+
# puts
|
157
148
|
|
158
149
|
denotations = alignment.transform_hdenotations(source_annotations[:denotations])
|
159
|
-
puts "[Invalid transformation]"
|
160
|
-
denotations.each do |d|
|
161
|
-
|
162
|
-
end
|
163
|
-
puts "====="
|
164
|
-
puts
|
150
|
+
# puts "[Invalid transformation]"
|
151
|
+
# denotations.each do |d|
|
152
|
+
# p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
|
153
|
+
# end
|
154
|
+
# puts "====="
|
155
|
+
# puts
|
165
156
|
|
166
157
|
lost_annotations += alignment.lost_annotations if alignment.lost_annotations
|
167
158
|
|
@@ -15,6 +15,9 @@ class TextAlignment::TextAlignment
|
|
15
15
|
def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
|
16
16
|
raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
|
17
17
|
|
18
|
+
@ostr1 = _str1
|
19
|
+
@ostr2 = _str2
|
20
|
+
|
18
21
|
str1, str2, mappings = string_preprocessing(_str1, _str2)
|
19
22
|
|
20
23
|
# try exact match
|
@@ -24,6 +27,13 @@ class TextAlignment::TextAlignment
|
|
24
27
|
return @block_alignments
|
25
28
|
end
|
26
29
|
|
30
|
+
# try exact match
|
31
|
+
block_begin = str2.downcase.index(str1.downcase)
|
32
|
+
unless block_begin.nil?
|
33
|
+
@block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin}]
|
34
|
+
return @block_alignments
|
35
|
+
end
|
36
|
+
|
27
37
|
anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
|
28
38
|
|
29
39
|
# To collect matched blocks
|
@@ -91,7 +101,7 @@ class TextAlignment::TextAlignment
|
|
91
101
|
end
|
92
102
|
end
|
93
103
|
end
|
94
|
-
@block_alignments << mblocks[0]
|
104
|
+
@block_alignments << mblocks[0].merge(alignment: :block)
|
95
105
|
|
96
106
|
(1 ... mblocks.length).each do |i|
|
97
107
|
b1 = mblocks[i - 1][:source][:end]
|
@@ -112,7 +122,7 @@ class TextAlignment::TextAlignment
|
|
112
122
|
end
|
113
123
|
end
|
114
124
|
end
|
115
|
-
@block_alignments << mblocks[i]
|
125
|
+
@block_alignments << mblocks[i].merge(alignment: :block)
|
116
126
|
end
|
117
127
|
|
118
128
|
# Final step
|
@@ -154,7 +164,7 @@ class TextAlignment::TextAlignment
|
|
154
164
|
i = @block_alignments.index{|b| b[:source][:end] > begin_position}
|
155
165
|
block_alignment = @block_alignments[i]
|
156
166
|
|
157
|
-
b = if block_alignment[:alignment]
|
167
|
+
b = if block_alignment[:alignment] == :block
|
158
168
|
begin_position + block_alignment[:delta]
|
159
169
|
elsif block_alignment[:alignment] == :empty
|
160
170
|
if begin_position == block_alignment[:source][:begin]
|
@@ -173,7 +183,7 @@ class TextAlignment::TextAlignment
|
|
173
183
|
i = @block_alignments.index{|b| b[:source][:end] >= end_position}
|
174
184
|
block_alignment = @block_alignments[i]
|
175
185
|
|
176
|
-
e = if block_alignment[:alignment]
|
186
|
+
e = if block_alignment[:alignment] == :block
|
177
187
|
end_position + block_alignment[:delta]
|
178
188
|
elsif block_alignment[:alignment] == :empty
|
179
189
|
if end_position == block_alignment[:source][:end]
|
@@ -230,8 +240,126 @@ class TextAlignment::TextAlignment
|
|
230
240
|
r
|
231
241
|
end
|
232
242
|
|
233
|
-
|
243
|
+
def alignment_table
|
244
|
+
table = <<-TABLE
|
245
|
+
<table class='text_alignment_table'>
|
246
|
+
<thead>
|
247
|
+
<tr>
|
248
|
+
<th class='text_alignment_left' style='width:50%'>Text 1</th>
|
249
|
+
<th class='text_alignment_rigt'>Text 2</th>
|
250
|
+
</tr>
|
251
|
+
</thead>
|
252
|
+
<tbody>
|
253
|
+
TABLE
|
254
|
+
|
255
|
+
@block_alignments.each do |a|
|
256
|
+
table += alignment_table_th(a)
|
257
|
+
table += "<tr>\n" + case a[:alignment]
|
258
|
+
when :block
|
259
|
+
"<td colspan='2' class='text_alignment_common'>" +
|
260
|
+
@ostr1[a[:source][:begin] ... a[:source][:end]] +
|
261
|
+
"</td>\n"
|
262
|
+
when :empty
|
263
|
+
"<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
|
264
|
+
"<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
|
265
|
+
else
|
266
|
+
base = a[:source][:begin]
|
267
|
+
astr1 = a[:alignment].sdiff.map do |c|
|
268
|
+
case c.action
|
269
|
+
when '='
|
270
|
+
@ostr1[c.old_position + base]
|
271
|
+
when '+'
|
272
|
+
'_'
|
273
|
+
when '-'
|
274
|
+
@ostr1[c.old_position + base]
|
275
|
+
when '!'
|
276
|
+
@ostr1[c.old_position + base] + '_'
|
277
|
+
end
|
278
|
+
end.join('')
|
279
|
+
|
280
|
+
base = a[:target][:begin]
|
281
|
+
astr2 = a[:alignment].sdiff.map do |c|
|
282
|
+
case c.action
|
283
|
+
when '='
|
284
|
+
@ostr2[c.new_position + base]
|
285
|
+
when '+'
|
286
|
+
@ostr2[c.new_position + base]
|
287
|
+
when '-'
|
288
|
+
'_'
|
289
|
+
when '!'
|
290
|
+
'_' + @ostr2[c.new_position + base]
|
291
|
+
end
|
292
|
+
end.join('')
|
293
|
+
|
294
|
+
"<td class='text_alignment_left'>" + astr1 + "</td>\n" +
|
295
|
+
"<td class='text_alignment_right'>" + astr2 + "</td>\n"
|
296
|
+
end + "</tr>\n"
|
297
|
+
end
|
298
|
+
table += '</tbody></table>'
|
299
|
+
end
|
300
|
+
|
301
|
+
def alignment_table_th(a)
|
302
|
+
"<tr>" +
|
303
|
+
"<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
|
304
|
+
"<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
|
305
|
+
"</tr>"
|
306
|
+
end
|
307
|
+
|
308
|
+
def alignment_show
|
309
|
+
show = ''
|
310
|
+
@block_alignments.each do |a|
|
311
|
+
show += case a[:alignment]
|
312
|
+
when :block
|
313
|
+
"===== common =====\n" +
|
314
|
+
@ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
|
315
|
+
when :empty
|
316
|
+
puts "<<<<< string 1"
|
317
|
+
p @ostr1[a[:source][:begin] ... a[:source][:end]]
|
318
|
+
puts
|
319
|
+
puts ">>>>> string 2"
|
320
|
+
p @ostr2[a[:target][:begin] ... a[:target][:end]]
|
321
|
+
puts
|
322
|
+
else
|
323
|
+
puts "***** local mismatch"
|
324
|
+
astr1 = ''
|
325
|
+
astr2 = ''
|
326
|
+
|
327
|
+
base = a[:source][:begin]
|
328
|
+
astr1 = a[:alignment].sdiff.map do |c|
|
329
|
+
case c.action
|
330
|
+
when '='
|
331
|
+
@ostr1[c.old_position + base]
|
332
|
+
when '+'
|
333
|
+
'_'
|
334
|
+
when '-'
|
335
|
+
@ostr1[c.old_position + base]
|
336
|
+
when '!'
|
337
|
+
@ostr1[c.old_position + base] + '_'
|
338
|
+
end
|
339
|
+
end.join('')
|
340
|
+
|
341
|
+
base = a[:target][:begin]
|
342
|
+
astr2 = a[:alignment].sdiff.map do |c|
|
343
|
+
case c.action
|
344
|
+
when '='
|
345
|
+
@ostr2[c.new_position + base]
|
346
|
+
when '+'
|
347
|
+
@ostr2[c.new_position + base]
|
348
|
+
when '-'
|
349
|
+
'_'
|
350
|
+
when '!'
|
351
|
+
'_' + @ostr2[c.new_position + base]
|
352
|
+
end
|
353
|
+
end.join('')
|
234
354
|
|
355
|
+
puts '[' + astr1 + ']'
|
356
|
+
puts '[' + astr2 + ']'
|
357
|
+
puts
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
private
|
235
363
|
|
236
364
|
def string_preprocessing(_str1, _str2)
|
237
365
|
str1 = _str1.dup
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.5'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|