text_alignment 0.4.3 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52bc948955e2df858e397b14eabb4411f73b3ff1e4d879ff4b7015d3b5e03308
4
- data.tar.gz: fd20caec51c95bdc475e0698a52bb7fdebc9e22c43bb47267a883bcc75862268
3
+ metadata.gz: 9f5f7f27c8628123530d51d0a68060aa6fb850bcef8c7089c8bf990f7257a80b
4
+ data.tar.gz: 45f768df4e7d89c931295985adb31df9e725156e5a85d7f78a5b7cd26d00be4d
5
5
  SHA512:
6
- metadata.gz: dbcb7ab70a64d4a398a5c5761cc5b2f5de6835ccc0e2d0854556f03ef91d0c0294986cc2ff1273788e6b7b0c73dfdf86fd16ee1ef8ce35ecc11d61f8eaab9521
7
- data.tar.gz: 01d21cdcc0ab81d61e08ff1f52360ba35973756fd5060ce866391ff622d4cf87da945dba43a622a0581d63ee96c8723e1cb28991bfa02f4e1e803896bdc64d7f
6
+ metadata.gz: 4004293fa10eb247110764d16a24900a10e714072227dd6e5626d2123deca4bcec00b7255e1affaba4d7dda75d7e42049aabf6b1e6c51f4c005124443b5f9ffc
7
+ data.tar.gz: eb5c8a2c89c8973242bb77457e0f8d9922486d9743e7c39ee12ea4025ece6d888be5308eaddec076e5a21c84149dcb31bc0ed6fcd68881fb6c8c4063e49fb64d
@@ -103,65 +103,56 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
- pp alignment
106
+ # pp alignment
107
107
 
108
108
  # verification
109
- source_text = source_annotations[:text]
110
- puts "=====BEGIN"
111
- (0 ... source_text.rstrip.length).each do |p|
112
- t = alignment.transform_begin_position(p)
113
- if t.nil?
114
- print source_text[p]
115
- else
116
- print '.'
117
- end
118
- end
119
- puts
120
- puts "=====END"
121
-
122
- puts "=====BEGIN"
123
- (0 .. source_text.rstrip.length).each do |p|
124
- t = alignment.transform_end_position(p)
125
- if t.nil?
126
- print source_text[p]
127
- else
128
- print '.'
129
- end
130
- end
131
- puts
132
- puts "=====END"
133
-
134
- # alignment.block_alignments.each do |a|
135
- # if a[:alignment].nil? || a[:alignment] == :empty
136
- # # p [a[:source], a[:target]]
137
- # # p a[:alignment]
109
+ # source_text = source_annotations[:text]
110
+ # puts "=====BEGIN"
111
+ # (0 ... source_text.rstrip.length).each do |p|
112
+ # t = alignment.transform_begin_position(p)
113
+ # if t.nil?
114
+ # print source_text[p]
115
+ # else
116
+ # print '.'
117
+ # end
118
+ # end
119
+ # puts
120
+ # puts "=====END"
121
+
122
+ # puts "=====BEGIN"
123
+ # (0 .. source_text.rstrip.length).each do |p|
124
+ # t = alignment.transform_end_position(p)
125
+ # if t.nil?
126
+ # print source_text[p]
138
127
  # else
139
- # p [a[:source], a[:target]]
140
- # p a[:alignment].similarity
141
- # puts "--"
142
- # puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
143
- # puts "--"
144
- # puts target_text[a[:target][:begin] ... a[:target][:end]]
145
- # puts "======"
128
+ # print '.'
146
129
  # end
147
130
  # end
131
+ # puts
132
+ # puts "=====END"
133
+
134
+ source_text = source_annotations[:text]
135
+
136
+ # puts "[block alignment]"
137
+ puts alignment.alignment_table
138
+ # puts "====="
148
139
  # exit
149
140
 
150
141
  # verification of source denotations
151
- puts "[Invalid source denotations]"
152
- source_annotations[:denotations] do |d|
153
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
154
- end
155
- puts "====="
156
- puts
142
+ # puts "[Invalid source denotations]"
143
+ # source_annotations[:denotations] do |d|
144
+ # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
145
+ # end
146
+ # puts "====="
147
+ # puts
157
148
 
158
149
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
159
- puts "[Invalid transformation]"
160
- denotations.each do |d|
161
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
162
- end
163
- puts "====="
164
- puts
150
+ # puts "[Invalid transformation]"
151
+ # denotations.each do |d|
152
+ # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
153
+ # end
154
+ # puts "====="
155
+ # puts
165
156
 
166
157
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
167
158
 
@@ -15,6 +15,9 @@ class TextAlignment::TextAlignment
15
15
  def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
16
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
17
17
 
18
+ @ostr1 = _str1
19
+ @ostr2 = _str2
20
+
18
21
  str1, str2, mappings = string_preprocessing(_str1, _str2)
19
22
 
20
23
  # try exact match
@@ -24,6 +27,13 @@ class TextAlignment::TextAlignment
24
27
  return @block_alignments
25
28
  end
26
29
 
30
+ # try exact match
31
+ block_begin = str2.downcase.index(str1.downcase)
32
+ unless block_begin.nil?
33
+ @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin}]
34
+ return @block_alignments
35
+ end
36
+
27
37
  anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
28
38
 
29
39
  # To collect matched blocks
@@ -91,7 +101,7 @@ class TextAlignment::TextAlignment
91
101
  end
92
102
  end
93
103
  end
94
- @block_alignments << mblocks[0]
104
+ @block_alignments << mblocks[0].merge(alignment: :block)
95
105
 
96
106
  (1 ... mblocks.length).each do |i|
97
107
  b1 = mblocks[i - 1][:source][:end]
@@ -112,7 +122,7 @@ class TextAlignment::TextAlignment
112
122
  end
113
123
  end
114
124
  end
115
- @block_alignments << mblocks[i]
125
+ @block_alignments << mblocks[i].merge(alignment: :block)
116
126
  end
117
127
 
118
128
  # Final step
@@ -154,7 +164,7 @@ class TextAlignment::TextAlignment
154
164
  i = @block_alignments.index{|b| b[:source][:end] > begin_position}
155
165
  block_alignment = @block_alignments[i]
156
166
 
157
- b = if block_alignment[:alignment].nil?
167
+ b = if block_alignment[:alignment] == :block
158
168
  begin_position + block_alignment[:delta]
159
169
  elsif block_alignment[:alignment] == :empty
160
170
  if begin_position == block_alignment[:source][:begin]
@@ -173,7 +183,7 @@ class TextAlignment::TextAlignment
173
183
  i = @block_alignments.index{|b| b[:source][:end] >= end_position}
174
184
  block_alignment = @block_alignments[i]
175
185
 
176
- e = if block_alignment[:alignment].nil?
186
+ e = if block_alignment[:alignment] == :block
177
187
  end_position + block_alignment[:delta]
178
188
  elsif block_alignment[:alignment] == :empty
179
189
  if end_position == block_alignment[:source][:end]
@@ -230,8 +240,126 @@ class TextAlignment::TextAlignment
230
240
  r
231
241
  end
232
242
 
233
- private
243
+ def alignment_table
244
+ table = <<-TABLE
245
+ <table class='text_alignment_table'>
246
+ <thead>
247
+ <tr>
248
+ <th class='text_alignment_left' style='width:50%'>Text 1</th>
249
+ <th class='text_alignment_rigt'>Text 2</th>
250
+ </tr>
251
+ </thead>
252
+ <tbody>
253
+ TABLE
254
+
255
+ @block_alignments.each do |a|
256
+ table += alignment_table_th(a)
257
+ table += "<tr>\n" + case a[:alignment]
258
+ when :block
259
+ "<td colspan='2' class='text_alignment_common'>" +
260
+ @ostr1[a[:source][:begin] ... a[:source][:end]] +
261
+ "</td>\n"
262
+ when :empty
263
+ "<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
264
+ "<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
265
+ else
266
+ base = a[:source][:begin]
267
+ astr1 = a[:alignment].sdiff.map do |c|
268
+ case c.action
269
+ when '='
270
+ @ostr1[c.old_position + base]
271
+ when '+'
272
+ '_'
273
+ when '-'
274
+ @ostr1[c.old_position + base]
275
+ when '!'
276
+ @ostr1[c.old_position + base] + '_'
277
+ end
278
+ end.join('')
279
+
280
+ base = a[:target][:begin]
281
+ astr2 = a[:alignment].sdiff.map do |c|
282
+ case c.action
283
+ when '='
284
+ @ostr2[c.new_position + base]
285
+ when '+'
286
+ @ostr2[c.new_position + base]
287
+ when '-'
288
+ '_'
289
+ when '!'
290
+ '_' + @ostr2[c.new_position + base]
291
+ end
292
+ end.join('')
293
+
294
+ "<td class='text_alignment_left'>" + astr1 + "</td>\n" +
295
+ "<td class='text_alignment_right'>" + astr2 + "</td>\n"
296
+ end + "</tr>\n"
297
+ end
298
+ table += '</tbody></table>'
299
+ end
300
+
301
+ def alignment_table_th(a)
302
+ "<tr>" +
303
+ "<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
304
+ "<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
305
+ "</tr>"
306
+ end
307
+
308
+ def alignment_show
309
+ show = ''
310
+ @block_alignments.each do |a|
311
+ show += case a[:alignment]
312
+ when :block
313
+ "===== common =====\n" +
314
+ @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
315
+ when :empty
316
+ puts "<<<<< string 1"
317
+ p @ostr1[a[:source][:begin] ... a[:source][:end]]
318
+ puts
319
+ puts ">>>>> string 2"
320
+ p @ostr2[a[:target][:begin] ... a[:target][:end]]
321
+ puts
322
+ else
323
+ puts "***** local mismatch"
324
+ astr1 = ''
325
+ astr2 = ''
326
+
327
+ base = a[:source][:begin]
328
+ astr1 = a[:alignment].sdiff.map do |c|
329
+ case c.action
330
+ when '='
331
+ @ostr1[c.old_position + base]
332
+ when '+'
333
+ '_'
334
+ when '-'
335
+ @ostr1[c.old_position + base]
336
+ when '!'
337
+ @ostr1[c.old_position + base] + '_'
338
+ end
339
+ end.join('')
340
+
341
+ base = a[:target][:begin]
342
+ astr2 = a[:alignment].sdiff.map do |c|
343
+ case c.action
344
+ when '='
345
+ @ostr2[c.new_position + base]
346
+ when '+'
347
+ @ostr2[c.new_position + base]
348
+ when '-'
349
+ '_'
350
+ when '!'
351
+ '_' + @ostr2[c.new_position + base]
352
+ end
353
+ end.join('')
234
354
 
355
+ puts '[' + astr1 + ']'
356
+ puts '[' + astr2 + ']'
357
+ puts
358
+ end
359
+ end
360
+ end
361
+
362
+ private
235
363
 
236
364
  def string_preprocessing(_str1, _str2)
237
365
  str1 = _str1.dup
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.4.3'
2
+ VERSION = '0.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-02 00:00:00.000000000 Z
11
+ date: 2020-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary