text_alignment 0.5.2 → 0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 07e02285cce988b857421b5eae20c6b39394dbd0c904de2f416344cad69b725f
4
- data.tar.gz: 6ccdf9930bc97fc5bc6fc2a2d92f732867744342a6c828eaa3ac8029339f33c3
3
+ metadata.gz: dc820991f5f694f154b94c369158909ccba3760829e0d881c7fd2e6ef7ddd149
4
+ data.tar.gz: 40ae6f2e388405426a77682bd1a3fb7a3c853076eced9b7301b632081dfd0a57
5
5
  SHA512:
6
- metadata.gz: 477e77a9349857cd9ab69b5da4d048dd2b36b1d51a7af742a9d05cb1ee20eba8c6d67cd4882c380f5e2737be1ca0d6fa13c0a81b2c3b257a7f249ccb1f4e589c
7
- data.tar.gz: 904fe737512c8774a03c23ae7229df55dd0727fd1a41d3a73031b96e6458920c78c02c66990577624c830525734918c54a8821d1e700eb55d7c8b871ecd6edf3
6
+ metadata.gz: 5802241b4a8394d3c570c1d4b8f5e1d7706c72852e2d6e6fb23bda2f6e2972fa09f7001695db026667144e2af982eeb91ed0b700bd8151af6df794c98e3c069b
7
+ data.tar.gz: 8d7c93acbef6ab12bb2a0291444a7bcc73b0236bb5b0d06d274e95aa30c9ffc829965653b58270686147a9ac30ccf570518b3ad266120b320dfb20cd1620f5f9
@@ -35,6 +35,7 @@ class TextAlignment::LCSComparison
35
35
  @str2_match_final = sdiff[match_final].new_position
36
36
  mlcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
37
37
  @similarity = 2 * mlcs / (str1[@str1_match_initial .. @str1_match_final].scan(/\S/).count + str2[@str2_match_initial .. @str2_match_final].scan(/\S/).count).to_f
38
+ # @similarity = 2 * lcs / (str1[@str1_match_initial .. @str1_match_final].length + str2[@str2_match_initial .. @str2_match_final].length).to_f
38
39
  else
39
40
  @str1_match_initial = 0
40
41
  @str2_match_initial = 0
@@ -8,30 +8,29 @@ module TextAlignment; end unless defined? TextAlignment
8
8
  TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
9
9
 
10
10
  class TextAlignment::TextAlignment
11
- attr_reader :block_alignments
11
+ attr_reader :block_alignment
12
12
  attr_reader :similarity
13
13
  attr_reader :lost_annotations
14
14
 
15
15
  def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
16
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
17
17
 
18
- @ostr1 = _str1
19
- @ostr2 = _str2
18
+ @block_alignment = {source_text:_str1, target_text:_str2}
20
19
 
21
20
  str1, str2, mappings = string_preprocessing(_str1, _str2)
22
21
 
23
22
  # try exact match
24
23
  block_begin = str2.index(str1)
25
24
  unless block_begin.nil?
26
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
27
- return @block_alignments
25
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
26
+ return @block_alignment
28
27
  end
29
28
 
30
29
  # try exact match
31
30
  block_begin = str2.downcase.index(str1.downcase)
32
31
  unless block_begin.nil?
33
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
34
- return @block_alignments
32
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
33
+ return @block_alignment
35
34
  end
36
35
 
37
36
  anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
@@ -64,7 +63,7 @@ class TextAlignment::TextAlignment
64
63
  # puts
65
64
 
66
65
  ## To find block alignments
67
- @block_alignments = []
66
+ @block_alignment[:blocks] = []
68
67
  return if mblocks.empty?
69
68
 
70
69
  # Initial step
@@ -73,35 +72,35 @@ class TextAlignment::TextAlignment
73
72
  e2 = mblocks[0][:target][:begin]
74
73
 
75
74
  if mblocks[0][:target][:begin] == 0
76
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
75
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
77
76
  else
78
77
  _str1 = str1[0 ... e1]
79
78
  _str2 = str2[0 ... e2]
80
79
 
81
80
  unless _str1.strip.empty?
82
81
  if _str2.strip.empty?
83
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
82
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
84
83
  else
85
84
  len_min = [_str1.length, _str2.length].min
86
85
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
87
86
  b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
88
87
  b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
89
88
 
90
- @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
89
+ @block_alignment[:blocks] << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
91
90
 
92
91
  _str1 = str1[b1 ... e1]
93
92
  _str2 = str2[b2 ... e2]
94
93
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
95
94
  if alignment.similarity < 0.6
96
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
95
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
97
96
  else
98
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
97
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
99
98
  end
100
99
  end
101
100
  end
102
101
  end
103
102
  end
104
- @block_alignments << mblocks[0].merge(alignment: :block)
103
+ @block_alignment[:blocks] << mblocks[0].merge(alignment: :block)
105
104
 
106
105
  (1 ... mblocks.length).each do |i|
107
106
  b1 = mblocks[i - 1][:source][:end]
@@ -112,17 +111,17 @@ class TextAlignment::TextAlignment
112
111
  _str2 = str2[b2 ... e2]
113
112
  unless _str1.strip.empty?
114
113
  if _str2.strip.empty?
115
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
114
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
116
115
  else
117
116
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
118
117
  if alignment.similarity < 0.6
119
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
118
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
120
119
  else
121
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
120
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
122
121
  end
123
122
  end
124
123
  end
125
- @block_alignments << mblocks[i].merge(alignment: :block)
124
+ @block_alignment[:blocks] << mblocks[i].merge(alignment: :block)
126
125
  end
127
126
 
128
127
  # Final step
@@ -134,7 +133,7 @@ class TextAlignment::TextAlignment
134
133
 
135
134
  unless _str1.strip.empty?
136
135
  if _str2.strip.empty?
137
- @block_alignments << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
136
+ @block_alignment[:blocks] << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
138
137
  else
139
138
  len_min = [_str1.length, _str2.length].min
140
139
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
@@ -145,56 +144,56 @@ class TextAlignment::TextAlignment
145
144
 
146
145
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
147
146
  if alignment.similarity < 0.6
148
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
147
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
149
148
  else
150
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
149
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
151
150
  end
152
151
 
153
- @block_alignments << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
152
+ @block_alignment[:blocks] << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
154
153
  end
155
154
  end
156
155
  end
157
156
 
158
- @block_alignments.each do |a|
157
+ @block_alignment[:blocks].each do |a|
159
158
  a[:delta] = a[:target][:begin] - a[:source][:begin]
160
159
  end
161
160
  end
162
161
 
163
162
  def transform_begin_position(begin_position)
164
- i = @block_alignments.index{|b| b[:source][:end] > begin_position}
165
- block_alignment = @block_alignments[i]
166
-
167
- b = if block_alignment[:alignment] == :block
168
- begin_position + block_alignment[:delta]
169
- elsif block_alignment[:alignment] == :empty
170
- if begin_position == block_alignment[:source][:begin]
171
- block_alignment[:target][:begin]
163
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
164
+ block = @block_alignment[:blocks][i]
165
+
166
+ b = if block[:alignment] == :block
167
+ begin_position + block[:delta]
168
+ elsif block[:alignment] == :empty
169
+ if begin_position == block[:source][:begin]
170
+ block[:target][:begin]
172
171
  else
173
172
  # raise "lost annotation"
174
173
  nil
175
174
  end
176
175
  else
177
- r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
178
- r.nil? ? nil : r + block_alignment[:target][:begin]
176
+ r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
177
+ r.nil? ? nil : r + block[:target][:begin]
179
178
  end
180
179
  end
181
180
 
182
181
  def transform_end_position(end_position)
183
- i = @block_alignments.index{|b| b[:source][:end] >= end_position}
184
- block_alignment = @block_alignments[i]
185
-
186
- e = if block_alignment[:alignment] == :block
187
- end_position + block_alignment[:delta]
188
- elsif block_alignment[:alignment] == :empty
189
- if end_position == block_alignment[:source][:end]
190
- block_alignment[:target][:end]
182
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
183
+ block = @block_alignment[:blocks][i]
184
+
185
+ e = if block[:alignment] == :block
186
+ end_position + block[:delta]
187
+ elsif block[:alignment] == :empty
188
+ if end_position == block[:source][:end]
189
+ block[:target][:end]
191
190
  else
192
191
  # raise "lost annotation"
193
192
  nil
194
193
  end
195
194
  else
196
- r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
197
- r.nil? ? nil : r + block_alignment[:target][:begin]
195
+ r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
196
+ r.nil? ? nil : r + block[:target][:begin]
198
197
  end
199
198
  end
200
199
 
@@ -240,83 +239,21 @@ class TextAlignment::TextAlignment
240
239
  r
241
240
  end
242
241
 
243
- def alignment_table
244
- table = <<-TABLE
245
- <table class='text_alignment_table'>
246
- <thead>
247
- <tr>
248
- <th class='text_alignment_left' style='width:50%'>Text 1</th>
249
- <th class='text_alignment_rigt'>Text 2</th>
250
- </tr>
251
- </thead>
252
- <tbody>
253
- TABLE
254
-
255
- @block_alignments.each do |a|
256
- table += alignment_table_th(a)
257
- table += "<tr>\n" + case a[:alignment]
258
- when :block
259
- "<td colspan='2' class='text_alignment_common'>" +
260
- @ostr1[a[:source][:begin] ... a[:source][:end]] +
261
- "</td>\n"
262
- when :empty
263
- "<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
264
- "<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
265
- else
266
- base = a[:source][:begin]
267
- astr1 = a[:alignment].sdiff.map do |c|
268
- case c.action
269
- when '='
270
- @ostr1[c.old_position + base]
271
- when '+'
272
- '_'
273
- when '-'
274
- @ostr1[c.old_position + base]
275
- when '!'
276
- @ostr1[c.old_position + base] + '_'
277
- end
278
- end.join('')
279
-
280
- base = a[:target][:begin]
281
- astr2 = a[:alignment].sdiff.map do |c|
282
- case c.action
283
- when '='
284
- @ostr2[c.new_position + base]
285
- when '+'
286
- @ostr2[c.new_position + base]
287
- when '-'
288
- '_'
289
- when '!'
290
- '_' + @ostr2[c.new_position + base]
291
- end
292
- end.join('')
293
-
294
- "<td class='text_alignment_left'>" + astr1 + "</td>\n" +
295
- "<td class='text_alignment_right'>" + astr2 + "</td>\n"
296
- end + "</tr>\n"
297
- end
298
- table += '</tbody></table>'
299
- end
300
-
301
- def alignment_table_th(a)
302
- "<tr>" +
303
- "<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
304
- "<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
305
- "</tr>"
306
- end
307
-
308
242
  def alignment_show
243
+ stext = @block_alignment[:source_text]
244
+ ttext = @block_alignment[:target_text]
245
+
309
246
  show = ''
310
- @block_alignments.each do |a|
247
+ @block_alignment[:blocks].each do |a|
311
248
  show += case a[:alignment]
312
249
  when :block
313
250
  "===== common =====\n" +
314
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
251
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
315
252
  when :empty
316
253
  "<<<<< string 1\n" +
317
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
254
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
318
255
  ">>>>> string 2\n" +
319
- @ostr2[a[:target][:begin] ... a[:target][:end]] + "\n\n"
256
+ ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
320
257
  else
321
258
  astr1 = ''
322
259
  astr2 = ''
@@ -325,13 +262,13 @@ class TextAlignment::TextAlignment
325
262
  astr1 = a[:alignment].sdiff.map do |c|
326
263
  case c.action
327
264
  when '='
328
- @ostr1[c.old_position + base]
265
+ stext[c.old_position + base]
329
266
  when '+'
330
267
  '_'
331
268
  when '-'
332
- @ostr1[c.old_position + base]
269
+ stext[c.old_position + base]
333
270
  when '!'
334
- @ostr1[c.old_position + base] + '_'
271
+ stext[c.old_position + base] + '_'
335
272
  end
336
273
  end.join('')
337
274
 
@@ -339,13 +276,13 @@ class TextAlignment::TextAlignment
339
276
  astr2 = a[:alignment].sdiff.map do |c|
340
277
  case c.action
341
278
  when '='
342
- @ostr2[c.new_position + base]
279
+ ttext[c.new_position + base]
343
280
  when '+'
344
- @ostr2[c.new_position + base]
281
+ ttext[c.new_position + base]
345
282
  when '-'
346
283
  '_'
347
284
  when '!'
348
- '_' + @ostr2[c.new_position + base]
285
+ '_' + ttext[c.new_position + base]
349
286
  end
350
287
  end.join('')
351
288
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.5.2'
2
+ VERSION = '0.6'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: '0.6'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-05 00:00:00.000000000 Z
11
+ date: 2020-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary