text_alignment 0.4.3 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52bc948955e2df858e397b14eabb4411f73b3ff1e4d879ff4b7015d3b5e03308
4
- data.tar.gz: fd20caec51c95bdc475e0698a52bb7fdebc9e22c43bb47267a883bcc75862268
3
+ metadata.gz: 9f5f7f27c8628123530d51d0a68060aa6fb850bcef8c7089c8bf990f7257a80b
4
+ data.tar.gz: 45f768df4e7d89c931295985adb31df9e725156e5a85d7f78a5b7cd26d00be4d
5
5
  SHA512:
6
- metadata.gz: dbcb7ab70a64d4a398a5c5761cc5b2f5de6835ccc0e2d0854556f03ef91d0c0294986cc2ff1273788e6b7b0c73dfdf86fd16ee1ef8ce35ecc11d61f8eaab9521
7
- data.tar.gz: 01d21cdcc0ab81d61e08ff1f52360ba35973756fd5060ce866391ff622d4cf87da945dba43a622a0581d63ee96c8723e1cb28991bfa02f4e1e803896bdc64d7f
6
+ metadata.gz: 4004293fa10eb247110764d16a24900a10e714072227dd6e5626d2123deca4bcec00b7255e1affaba4d7dda75d7e42049aabf6b1e6c51f4c005124443b5f9ffc
7
+ data.tar.gz: eb5c8a2c89c8973242bb77457e0f8d9922486d9743e7c39ee12ea4025ece6d888be5308eaddec076e5a21c84149dcb31bc0ed6fcd68881fb6c8c4063e49fb64d
@@ -103,65 +103,56 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
- pp alignment
106
+ # pp alignment
107
107
 
108
108
  # verification
109
- source_text = source_annotations[:text]
110
- puts "=====BEGIN"
111
- (0 ... source_text.rstrip.length).each do |p|
112
- t = alignment.transform_begin_position(p)
113
- if t.nil?
114
- print source_text[p]
115
- else
116
- print '.'
117
- end
118
- end
119
- puts
120
- puts "=====END"
121
-
122
- puts "=====BEGIN"
123
- (0 .. source_text.rstrip.length).each do |p|
124
- t = alignment.transform_end_position(p)
125
- if t.nil?
126
- print source_text[p]
127
- else
128
- print '.'
129
- end
130
- end
131
- puts
132
- puts "=====END"
133
-
134
- # alignment.block_alignments.each do |a|
135
- # if a[:alignment].nil? || a[:alignment] == :empty
136
- # # p [a[:source], a[:target]]
137
- # # p a[:alignment]
109
+ # source_text = source_annotations[:text]
110
+ # puts "=====BEGIN"
111
+ # (0 ... source_text.rstrip.length).each do |p|
112
+ # t = alignment.transform_begin_position(p)
113
+ # if t.nil?
114
+ # print source_text[p]
115
+ # else
116
+ # print '.'
117
+ # end
118
+ # end
119
+ # puts
120
+ # puts "=====END"
121
+
122
+ # puts "=====BEGIN"
123
+ # (0 .. source_text.rstrip.length).each do |p|
124
+ # t = alignment.transform_end_position(p)
125
+ # if t.nil?
126
+ # print source_text[p]
138
127
  # else
139
- # p [a[:source], a[:target]]
140
- # p a[:alignment].similarity
141
- # puts "--"
142
- # puts source_annotations[:text][a[:source][:begin] ... a[:source][:end]]
143
- # puts "--"
144
- # puts target_text[a[:target][:begin] ... a[:target][:end]]
145
- # puts "======"
128
+ # print '.'
146
129
  # end
147
130
  # end
131
+ # puts
132
+ # puts "=====END"
133
+
134
+ source_text = source_annotations[:text]
135
+
136
+ # puts "[block alignment]"
137
+ puts alignment.alignment_table
138
+ # puts "====="
148
139
  # exit
149
140
 
150
141
  # verification of source denotations
151
- puts "[Invalid source denotations]"
152
- source_annotations[:denotations] do |d|
153
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
154
- end
155
- puts "====="
156
- puts
142
+ # puts "[Invalid source denotations]"
143
+ # source_annotations[:denotations] do |d|
144
+ # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
145
+ # end
146
+ # puts "====="
147
+ # puts
157
148
 
158
149
  denotations = alignment.transform_hdenotations(source_annotations[:denotations])
159
- puts "[Invalid transformation]"
160
- denotations.each do |d|
161
- p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
162
- end
163
- puts "====="
164
- puts
150
+ # puts "[Invalid transformation]"
151
+ # denotations.each do |d|
152
+ # p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
153
+ # end
154
+ # puts "====="
155
+ # puts
165
156
 
166
157
  lost_annotations += alignment.lost_annotations if alignment.lost_annotations
167
158
 
@@ -15,6 +15,9 @@ class TextAlignment::TextAlignment
15
15
  def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
16
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
17
17
 
18
+ @ostr1 = _str1
19
+ @ostr2 = _str2
20
+
18
21
  str1, str2, mappings = string_preprocessing(_str1, _str2)
19
22
 
20
23
  # try exact match
@@ -24,6 +27,13 @@ class TextAlignment::TextAlignment
24
27
  return @block_alignments
25
28
  end
26
29
 
30
+ # try exact match
31
+ block_begin = str2.downcase.index(str1.downcase)
32
+ unless block_begin.nil?
33
+ @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin}]
34
+ return @block_alignments
35
+ end
36
+
27
37
  anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
28
38
 
29
39
  # To collect matched blocks
@@ -91,7 +101,7 @@ class TextAlignment::TextAlignment
91
101
  end
92
102
  end
93
103
  end
94
- @block_alignments << mblocks[0]
104
+ @block_alignments << mblocks[0].merge(alignment: :block)
95
105
 
96
106
  (1 ... mblocks.length).each do |i|
97
107
  b1 = mblocks[i - 1][:source][:end]
@@ -112,7 +122,7 @@ class TextAlignment::TextAlignment
112
122
  end
113
123
  end
114
124
  end
115
- @block_alignments << mblocks[i]
125
+ @block_alignments << mblocks[i].merge(alignment: :block)
116
126
  end
117
127
 
118
128
  # Final step
@@ -154,7 +164,7 @@ class TextAlignment::TextAlignment
154
164
  i = @block_alignments.index{|b| b[:source][:end] > begin_position}
155
165
  block_alignment = @block_alignments[i]
156
166
 
157
- b = if block_alignment[:alignment].nil?
167
+ b = if block_alignment[:alignment] == :block
158
168
  begin_position + block_alignment[:delta]
159
169
  elsif block_alignment[:alignment] == :empty
160
170
  if begin_position == block_alignment[:source][:begin]
@@ -173,7 +183,7 @@ class TextAlignment::TextAlignment
173
183
  i = @block_alignments.index{|b| b[:source][:end] >= end_position}
174
184
  block_alignment = @block_alignments[i]
175
185
 
176
- e = if block_alignment[:alignment].nil?
186
+ e = if block_alignment[:alignment] == :block
177
187
  end_position + block_alignment[:delta]
178
188
  elsif block_alignment[:alignment] == :empty
179
189
  if end_position == block_alignment[:source][:end]
@@ -230,8 +240,126 @@ class TextAlignment::TextAlignment
230
240
  r
231
241
  end
232
242
 
233
- private
243
+ def alignment_table
244
+ table = <<-TABLE
245
+ <table class='text_alignment_table'>
246
+ <thead>
247
+ <tr>
248
+ <th class='text_alignment_left' style='width:50%'>Text 1</th>
249
+ <th class='text_alignment_rigt'>Text 2</th>
250
+ </tr>
251
+ </thead>
252
+ <tbody>
253
+ TABLE
254
+
255
+ @block_alignments.each do |a|
256
+ table += alignment_table_th(a)
257
+ table += "<tr>\n" + case a[:alignment]
258
+ when :block
259
+ "<td colspan='2' class='text_alignment_common'>" +
260
+ @ostr1[a[:source][:begin] ... a[:source][:end]] +
261
+ "</td>\n"
262
+ when :empty
263
+ "<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
264
+ "<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
265
+ else
266
+ base = a[:source][:begin]
267
+ astr1 = a[:alignment].sdiff.map do |c|
268
+ case c.action
269
+ when '='
270
+ @ostr1[c.old_position + base]
271
+ when '+'
272
+ '_'
273
+ when '-'
274
+ @ostr1[c.old_position + base]
275
+ when '!'
276
+ @ostr1[c.old_position + base] + '_'
277
+ end
278
+ end.join('')
279
+
280
+ base = a[:target][:begin]
281
+ astr2 = a[:alignment].sdiff.map do |c|
282
+ case c.action
283
+ when '='
284
+ @ostr2[c.new_position + base]
285
+ when '+'
286
+ @ostr2[c.new_position + base]
287
+ when '-'
288
+ '_'
289
+ when '!'
290
+ '_' + @ostr2[c.new_position + base]
291
+ end
292
+ end.join('')
293
+
294
+ "<td class='text_alignment_left'>" + astr1 + "</td>\n" +
295
+ "<td class='text_alignment_right'>" + astr2 + "</td>\n"
296
+ end + "</tr>\n"
297
+ end
298
+ table += '</tbody></table>'
299
+ end
300
+
301
+ def alignment_table_th(a)
302
+ "<tr>" +
303
+ "<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
304
+ "<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
305
+ "</tr>"
306
+ end
307
+
308
+ def alignment_show
309
+ show = ''
310
+ @block_alignments.each do |a|
311
+ show += case a[:alignment]
312
+ when :block
313
+ "===== common =====\n" +
314
+ @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
315
+ when :empty
316
+ puts "<<<<< string 1"
317
+ p @ostr1[a[:source][:begin] ... a[:source][:end]]
318
+ puts
319
+ puts ">>>>> string 2"
320
+ p @ostr2[a[:target][:begin] ... a[:target][:end]]
321
+ puts
322
+ else
323
+ puts "***** local mismatch"
324
+ astr1 = ''
325
+ astr2 = ''
326
+
327
+ base = a[:source][:begin]
328
+ astr1 = a[:alignment].sdiff.map do |c|
329
+ case c.action
330
+ when '='
331
+ @ostr1[c.old_position + base]
332
+ when '+'
333
+ '_'
334
+ when '-'
335
+ @ostr1[c.old_position + base]
336
+ when '!'
337
+ @ostr1[c.old_position + base] + '_'
338
+ end
339
+ end.join('')
340
+
341
+ base = a[:target][:begin]
342
+ astr2 = a[:alignment].sdiff.map do |c|
343
+ case c.action
344
+ when '='
345
+ @ostr2[c.new_position + base]
346
+ when '+'
347
+ @ostr2[c.new_position + base]
348
+ when '-'
349
+ '_'
350
+ when '!'
351
+ '_' + @ostr2[c.new_position + base]
352
+ end
353
+ end.join('')
234
354
 
355
+ puts '[' + astr1 + ']'
356
+ puts '[' + astr2 + ']'
357
+ puts
358
+ end
359
+ end
360
+ end
361
+
362
+ private
235
363
 
236
364
  def string_preprocessing(_str1, _str2)
237
365
  str1 = _str1.dup
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.4.3'
2
+ VERSION = '0.5'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-02 00:00:00.000000000 Z
11
+ date: 2020-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary