text_alignment 0.6 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc820991f5f694f154b94c369158909ccba3760829e0d881c7fd2e6ef7ddd149
4
- data.tar.gz: 40ae6f2e388405426a77682bd1a3fb7a3c853076eced9b7301b632081dfd0a57
3
+ metadata.gz: fb5dd06236d0b1a8a9c8c5fcb92807a62bdd30e0648bcbd636b95b2a8a45b9b4
4
+ data.tar.gz: 9266b852993bfee999daa92e3f38ec93e2aec77171fee27c1fea6ac2a17e4d23
5
5
  SHA512:
6
- metadata.gz: 5802241b4a8394d3c570c1d4b8f5e1d7706c72852e2d6e6fb23bda2f6e2972fa09f7001695db026667144e2af982eeb91ed0b700bd8151af6df794c98e3c069b
7
- data.tar.gz: 8d7c93acbef6ab12bb2a0291444a7bcc73b0236bb5b0d06d274e95aa30c9ffc829965653b58270686147a9ac30ccf570518b3ad266120b320dfb20cd1620f5f9
6
+ metadata.gz: 7ee2a590fb31bcc27121a4a227d7fcefe2e8e80646bea3898bb86729ca3ca299e0aebcf23bea30e2391687e6ec0d6573c04a4605f728562482c7edbd0c0285e0
7
+ data.tar.gz: 73612c185fe533b0daa22d44e7776ed610025cb1bd874f05d95761079f95d1e8a06ead68c88b84bab4d33e8a676edff1e98880912254d9a7ecb5c4ead5eb01fb
@@ -91,8 +91,9 @@ class TextAlignment::TextAlignment
91
91
  _str1 = str1[b1 ... e1]
92
92
  _str2 = str2[b2 ... e2]
93
93
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
94
- if alignment.similarity < 0.6
95
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
94
+ similarity = alignment_similarity(_str1, _str2, alignment)
95
+ if similarity < 0.6
96
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty, similarity: similarity}
96
97
  else
97
98
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
98
99
  end
@@ -114,8 +115,9 @@ class TextAlignment::TextAlignment
114
115
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
115
116
  else
116
117
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
117
- if alignment.similarity < 0.6
118
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
118
+ similarity = alignment_similarity(_str1, _str2, alignment)
119
+ if similarity < 0.6
120
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: similarity}
119
121
  else
120
122
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
121
123
  end
@@ -143,8 +145,9 @@ class TextAlignment::TextAlignment
143
145
  _str2 = str2[b2 ... e2]
144
146
 
145
147
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
146
- if alignment.similarity < 0.6
147
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
148
+ similarity = alignment_similarity(_str1, _str2, alignment)
149
+ if similarity < 0.6
150
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: similarity}
148
151
  else
149
152
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
150
153
  end
@@ -250,6 +253,7 @@ class TextAlignment::TextAlignment
250
253
  "===== common =====\n" +
251
254
  stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
252
255
  when :empty
256
+ "xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
253
257
  "<<<<< string 1\n" +
254
258
  stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
255
259
  ">>>>> string 2\n" +
@@ -316,15 +320,15 @@ class TextAlignment::TextAlignment
316
320
  pletters = TextAlignment::PADDING_LETTERS
317
321
 
318
322
  # find the padding letter for str1
319
- padding_letter1 = begin
323
+ @padding_letter1 = begin
320
324
  i = pletters.index{|l| str2.index(l).nil?}
321
325
  raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
322
326
  TextAlignment::PADDING_LETTERS[i]
323
327
  end
324
328
 
325
329
  # find the padding letter for str2
326
- padding_letter2 = begin
327
- i = pletters.index{|l| l != padding_letter1 && str1.index(l).nil?}
330
+ @padding_letter2 = begin
331
+ i = pletters.index{|l| l != @padding_letter1 && str1.index(l).nil?}
328
332
  raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
329
333
  TextAlignment::PADDING_LETTERS[i]
330
334
  end
@@ -335,12 +339,12 @@ class TextAlignment::TextAlignment
335
339
  from = f[1]
336
340
 
337
341
  if str2.index(f[0])
338
- to = f[0] + (padding_letter1 * (f[1].length - 1))
342
+ to = f[0] + (@padding_letter1 * (f[1].length - 1))
339
343
  str1.gsub!(from, to)
340
344
  end
341
345
 
342
346
  if str1.index(f[0])
343
- to = f[0] + (padding_letter2 * (f[1].length - 1))
347
+ to = f[0] + (@padding_letter2 * (f[1].length - 1))
344
348
  str2.gsub!(from, to)
345
349
  end
346
350
  end
@@ -349,4 +353,13 @@ class TextAlignment::TextAlignment
349
353
  [str1, str2, mappings]
350
354
  end
351
355
 
356
+ def alignment_similarity(_s1, _s2, alignment)
357
+ # compute the lcs only with non-whitespace letters
358
+ lcs = alignment.sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
359
+
360
+ s1 = _s1.tr(@padding_letter1, ' ')
361
+ s2 = _s2.tr(@padding_letter2, ' ')
362
+ similarity = 2 * lcs / (s1.scan(/\S/).count + s2.scan(/\S/).count).to_f
363
+ end
364
+
352
365
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.6'
2
+ VERSION = '0.6.1'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.6'
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim