text_alignment 0.5.2 → 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 07e02285cce988b857421b5eae20c6b39394dbd0c904de2f416344cad69b725f
4
- data.tar.gz: 6ccdf9930bc97fc5bc6fc2a2d92f732867744342a6c828eaa3ac8029339f33c3
3
+ metadata.gz: dc820991f5f694f154b94c369158909ccba3760829e0d881c7fd2e6ef7ddd149
4
+ data.tar.gz: 40ae6f2e388405426a77682bd1a3fb7a3c853076eced9b7301b632081dfd0a57
5
5
  SHA512:
6
- metadata.gz: 477e77a9349857cd9ab69b5da4d048dd2b36b1d51a7af742a9d05cb1ee20eba8c6d67cd4882c380f5e2737be1ca0d6fa13c0a81b2c3b257a7f249ccb1f4e589c
7
- data.tar.gz: 904fe737512c8774a03c23ae7229df55dd0727fd1a41d3a73031b96e6458920c78c02c66990577624c830525734918c54a8821d1e700eb55d7c8b871ecd6edf3
6
+ metadata.gz: 5802241b4a8394d3c570c1d4b8f5e1d7706c72852e2d6e6fb23bda2f6e2972fa09f7001695db026667144e2af982eeb91ed0b700bd8151af6df794c98e3c069b
7
+ data.tar.gz: 8d7c93acbef6ab12bb2a0291444a7bcc73b0236bb5b0d06d274e95aa30c9ffc829965653b58270686147a9ac30ccf570518b3ad266120b320dfb20cd1620f5f9
@@ -35,6 +35,7 @@ class TextAlignment::LCSComparison
35
35
  @str2_match_final = sdiff[match_final].new_position
36
36
  mlcs = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
37
37
  @similarity = 2 * mlcs / (str1[@str1_match_initial .. @str1_match_final].scan(/\S/).count + str2[@str2_match_initial .. @str2_match_final].scan(/\S/).count).to_f
38
+ # @similarity = 2 * lcs / (str1[@str1_match_initial .. @str1_match_final].length + str2[@str2_match_initial .. @str2_match_final].length).to_f
38
39
  else
39
40
  @str1_match_initial = 0
40
41
  @str2_match_initial = 0
@@ -8,30 +8,29 @@ module TextAlignment; end unless defined? TextAlignment
8
8
  TextAlignment::PADDING_LETTERS = ['@', '^', '|', '#', '$', '%', '&', '_'] unless defined? TextAlignment::PADDING_LETTERS
9
9
 
10
10
  class TextAlignment::TextAlignment
11
- attr_reader :block_alignments
11
+ attr_reader :block_alignment
12
12
  attr_reader :similarity
13
13
  attr_reader :lost_annotations
14
14
 
15
15
  def initialize(_str1, _str2, _size_ngram = nil, _size_window = nil, _text_similiarity_threshold = nil)
16
16
  raise ArgumentError, "nil string" if _str1.nil? || _str2.nil?
17
17
 
18
- @ostr1 = _str1
19
- @ostr2 = _str2
18
+ @block_alignment = {source_text:_str1, target_text:_str2}
20
19
 
21
20
  str1, str2, mappings = string_preprocessing(_str1, _str2)
22
21
 
23
22
  # try exact match
24
23
  block_begin = str2.index(str1)
25
24
  unless block_begin.nil?
26
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
27
- return @block_alignments
25
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
26
+ return @block_alignment
28
27
  end
29
28
 
30
29
  # try exact match
31
30
  block_begin = str2.downcase.index(str1.downcase)
32
31
  unless block_begin.nil?
33
- @block_alignments = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
34
- return @block_alignments
32
+ @block_alignment[:blocks] = [{source:{begin:0, end:str1.length}, target:{begin:block_begin, end:block_begin + str1.length}, delta:block_begin, alignment: :block}]
33
+ return @block_alignment
35
34
  end
36
35
 
37
36
  anchor_finder = TextAlignment::AnchorFinder.new(str1, str2, _size_ngram, _size_window, _text_similiarity_threshold)
@@ -64,7 +63,7 @@ class TextAlignment::TextAlignment
64
63
  # puts
65
64
 
66
65
  ## To find block alignments
67
- @block_alignments = []
66
+ @block_alignment[:blocks] = []
68
67
  return if mblocks.empty?
69
68
 
70
69
  # Initial step
@@ -73,35 +72,35 @@ class TextAlignment::TextAlignment
73
72
  e2 = mblocks[0][:target][:begin]
74
73
 
75
74
  if mblocks[0][:target][:begin] == 0
76
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
75
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:0}, alignment: :empty}
77
76
  else
78
77
  _str1 = str1[0 ... e1]
79
78
  _str2 = str2[0 ... e2]
80
79
 
81
80
  unless _str1.strip.empty?
82
81
  if _str2.strip.empty?
83
- @block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
82
+ @block_alignment[:blocks] << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
84
83
  else
85
84
  len_min = [_str1.length, _str2.length].min
86
85
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
87
86
  b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
88
87
  b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
89
88
 
90
- @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
89
+ @block_alignment[:blocks] << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
91
90
 
92
91
  _str1 = str1[b1 ... e1]
93
92
  _str2 = str2[b2 ... e2]
94
93
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
95
94
  if alignment.similarity < 0.6
96
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
95
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
97
96
  else
98
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
97
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
99
98
  end
100
99
  end
101
100
  end
102
101
  end
103
102
  end
104
- @block_alignments << mblocks[0].merge(alignment: :block)
103
+ @block_alignment[:blocks] << mblocks[0].merge(alignment: :block)
105
104
 
106
105
  (1 ... mblocks.length).each do |i|
107
106
  b1 = mblocks[i - 1][:source][:end]
@@ -112,17 +111,17 @@ class TextAlignment::TextAlignment
112
111
  _str2 = str2[b2 ... e2]
113
112
  unless _str1.strip.empty?
114
113
  if _str2.strip.empty?
115
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
114
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
116
115
  else
117
116
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
118
117
  if alignment.similarity < 0.6
119
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
118
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
120
119
  else
121
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
120
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
122
121
  end
123
122
  end
124
123
  end
125
- @block_alignments << mblocks[i].merge(alignment: :block)
124
+ @block_alignment[:blocks] << mblocks[i].merge(alignment: :block)
126
125
  end
127
126
 
128
127
  # Final step
@@ -134,7 +133,7 @@ class TextAlignment::TextAlignment
134
133
 
135
134
  unless _str1.strip.empty?
136
135
  if _str2.strip.empty?
137
- @block_alignments << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
136
+ @block_alignment[:blocks] << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
138
137
  else
139
138
  len_min = [_str1.length, _str2.length].min
140
139
  len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
@@ -145,56 +144,56 @@ class TextAlignment::TextAlignment
145
144
 
146
145
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
147
146
  if alignment.similarity < 0.6
148
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
147
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
149
148
  else
150
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
149
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
151
150
  end
152
151
 
153
- @block_alignments << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
152
+ @block_alignment[:blocks] << {source:{begin:e1, end:-1}, target:{begin:e2, end:-1}, alignment: :empty} if e1 < str1.length
154
153
  end
155
154
  end
156
155
  end
157
156
 
158
- @block_alignments.each do |a|
157
+ @block_alignment[:blocks].each do |a|
159
158
  a[:delta] = a[:target][:begin] - a[:source][:begin]
160
159
  end
161
160
  end
162
161
 
163
162
  def transform_begin_position(begin_position)
164
- i = @block_alignments.index{|b| b[:source][:end] > begin_position}
165
- block_alignment = @block_alignments[i]
166
-
167
- b = if block_alignment[:alignment] == :block
168
- begin_position + block_alignment[:delta]
169
- elsif block_alignment[:alignment] == :empty
170
- if begin_position == block_alignment[:source][:begin]
171
- block_alignment[:target][:begin]
163
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] > begin_position}
164
+ block = @block_alignment[:blocks][i]
165
+
166
+ b = if block[:alignment] == :block
167
+ begin_position + block[:delta]
168
+ elsif block[:alignment] == :empty
169
+ if begin_position == block[:source][:begin]
170
+ block[:target][:begin]
172
171
  else
173
172
  # raise "lost annotation"
174
173
  nil
175
174
  end
176
175
  else
177
- r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
178
- r.nil? ? nil : r + block_alignment[:target][:begin]
176
+ r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
177
+ r.nil? ? nil : r + block[:target][:begin]
179
178
  end
180
179
  end
181
180
 
182
181
  def transform_end_position(end_position)
183
- i = @block_alignments.index{|b| b[:source][:end] >= end_position}
184
- block_alignment = @block_alignments[i]
185
-
186
- e = if block_alignment[:alignment] == :block
187
- end_position + block_alignment[:delta]
188
- elsif block_alignment[:alignment] == :empty
189
- if end_position == block_alignment[:source][:end]
190
- block_alignment[:target][:end]
182
+ i = @block_alignment[:blocks].index{|b| b[:source][:end] >= end_position}
183
+ block = @block_alignment[:blocks][i]
184
+
185
+ e = if block[:alignment] == :block
186
+ end_position + block[:delta]
187
+ elsif block[:alignment] == :empty
188
+ if end_position == block[:source][:end]
189
+ block[:target][:end]
191
190
  else
192
191
  # raise "lost annotation"
193
192
  nil
194
193
  end
195
194
  else
196
- r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
197
- r.nil? ? nil : r + block_alignment[:target][:begin]
195
+ r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
196
+ r.nil? ? nil : r + block[:target][:begin]
198
197
  end
199
198
  end
200
199
 
@@ -240,83 +239,21 @@ class TextAlignment::TextAlignment
240
239
  r
241
240
  end
242
241
 
243
- def alignment_table
244
- table = <<-TABLE
245
- <table class='text_alignment_table'>
246
- <thead>
247
- <tr>
248
- <th class='text_alignment_left' style='width:50%'>Text 1</th>
249
- <th class='text_alignment_rigt'>Text 2</th>
250
- </tr>
251
- </thead>
252
- <tbody>
253
- TABLE
254
-
255
- @block_alignments.each do |a|
256
- table += alignment_table_th(a)
257
- table += "<tr>\n" + case a[:alignment]
258
- when :block
259
- "<td colspan='2' class='text_alignment_common'>" +
260
- @ostr1[a[:source][:begin] ... a[:source][:end]] +
261
- "</td>\n"
262
- when :empty
263
- "<td class='text_alignment_left'>" + @ostr1[a[:source][:begin] ... a[:source][:end]] + "</td>\n" +
264
- "<td class='text_alignment_right'>" + @ostr2[a[:target][:begin] ... a[:target][:end]] + "</td>\n"
265
- else
266
- base = a[:source][:begin]
267
- astr1 = a[:alignment].sdiff.map do |c|
268
- case c.action
269
- when '='
270
- @ostr1[c.old_position + base]
271
- when '+'
272
- '_'
273
- when '-'
274
- @ostr1[c.old_position + base]
275
- when '!'
276
- @ostr1[c.old_position + base] + '_'
277
- end
278
- end.join('')
279
-
280
- base = a[:target][:begin]
281
- astr2 = a[:alignment].sdiff.map do |c|
282
- case c.action
283
- when '='
284
- @ostr2[c.new_position + base]
285
- when '+'
286
- @ostr2[c.new_position + base]
287
- when '-'
288
- '_'
289
- when '!'
290
- '_' + @ostr2[c.new_position + base]
291
- end
292
- end.join('')
293
-
294
- "<td class='text_alignment_left'>" + astr1 + "</td>\n" +
295
- "<td class='text_alignment_right'>" + astr2 + "</td>\n"
296
- end + "</tr>\n"
297
- end
298
- table += '</tbody></table>'
299
- end
300
-
301
- def alignment_table_th(a)
302
- "<tr>" +
303
- "<th class='text_alignment_left'>#{a[:source][:begin]} - #{a[:source][:end]}</th>" +
304
- "<th class='text_alignment_right'>#{a[:target][:begin]} - #{a[:target][:end]}</th>" +
305
- "</tr>"
306
- end
307
-
308
242
  def alignment_show
243
+ stext = @block_alignment[:source_text]
244
+ ttext = @block_alignment[:target_text]
245
+
309
246
  show = ''
310
- @block_alignments.each do |a|
247
+ @block_alignment[:blocks].each do |a|
311
248
  show += case a[:alignment]
312
249
  when :block
313
250
  "===== common =====\n" +
314
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n"
251
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
315
252
  when :empty
316
253
  "<<<<< string 1\n" +
317
- @ostr1[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
254
+ stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
318
255
  ">>>>> string 2\n" +
319
- @ostr2[a[:target][:begin] ... a[:target][:end]] + "\n\n"
256
+ ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
320
257
  else
321
258
  astr1 = ''
322
259
  astr2 = ''
@@ -325,13 +262,13 @@ class TextAlignment::TextAlignment
325
262
  astr1 = a[:alignment].sdiff.map do |c|
326
263
  case c.action
327
264
  when '='
328
- @ostr1[c.old_position + base]
265
+ stext[c.old_position + base]
329
266
  when '+'
330
267
  '_'
331
268
  when '-'
332
- @ostr1[c.old_position + base]
269
+ stext[c.old_position + base]
333
270
  when '!'
334
- @ostr1[c.old_position + base] + '_'
271
+ stext[c.old_position + base] + '_'
335
272
  end
336
273
  end.join('')
337
274
 
@@ -339,13 +276,13 @@ class TextAlignment::TextAlignment
339
276
  astr2 = a[:alignment].sdiff.map do |c|
340
277
  case c.action
341
278
  when '='
342
- @ostr2[c.new_position + base]
279
+ ttext[c.new_position + base]
343
280
  when '+'
344
- @ostr2[c.new_position + base]
281
+ ttext[c.new_position + base]
345
282
  when '-'
346
283
  '_'
347
284
  when '!'
348
- '_' + @ostr2[c.new_position + base]
285
+ '_' + ttext[c.new_position + base]
349
286
  end
350
287
  end.join('')
351
288
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.5.2'
2
+ VERSION = '0.6'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: '0.6'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-05 00:00:00.000000000 Z
11
+ date: 2020-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary