text_alignment 0.6 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc820991f5f694f154b94c369158909ccba3760829e0d881c7fd2e6ef7ddd149
4
- data.tar.gz: 40ae6f2e388405426a77682bd1a3fb7a3c853076eced9b7301b632081dfd0a57
3
+ metadata.gz: fb5dd06236d0b1a8a9c8c5fcb92807a62bdd30e0648bcbd636b95b2a8a45b9b4
4
+ data.tar.gz: 9266b852993bfee999daa92e3f38ec93e2aec77171fee27c1fea6ac2a17e4d23
5
5
  SHA512:
6
- metadata.gz: 5802241b4a8394d3c570c1d4b8f5e1d7706c72852e2d6e6fb23bda2f6e2972fa09f7001695db026667144e2af982eeb91ed0b700bd8151af6df794c98e3c069b
7
- data.tar.gz: 8d7c93acbef6ab12bb2a0291444a7bcc73b0236bb5b0d06d274e95aa30c9ffc829965653b58270686147a9ac30ccf570518b3ad266120b320dfb20cd1620f5f9
6
+ metadata.gz: 7ee2a590fb31bcc27121a4a227d7fcefe2e8e80646bea3898bb86729ca3ca299e0aebcf23bea30e2391687e6ec0d6573c04a4605f728562482c7edbd0c0285e0
7
+ data.tar.gz: 73612c185fe533b0daa22d44e7776ed610025cb1bd874f05d95761079f95d1e8a06ead68c88b84bab4d33e8a676edff1e98880912254d9a7ecb5c4ead5eb01fb
@@ -91,8 +91,9 @@ class TextAlignment::TextAlignment
91
91
  _str1 = str1[b1 ... e1]
92
92
  _str2 = str2[b2 ... e2]
93
93
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
94
- if alignment.similarity < 0.6
95
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
94
+ similarity = alignment_similarity(_str1, _str2, alignment)
95
+ if similarity < 0.6
96
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty, similarity: similarity}
96
97
  else
97
98
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment:alignment}
98
99
  end
@@ -114,8 +115,9 @@ class TextAlignment::TextAlignment
114
115
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
115
116
  else
116
117
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
117
- if alignment.similarity < 0.6
118
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
118
+ similarity = alignment_similarity(_str1, _str2, alignment)
119
+ if similarity < 0.6
120
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: similarity}
119
121
  else
120
122
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
121
123
  end
@@ -143,8 +145,9 @@ class TextAlignment::TextAlignment
143
145
  _str2 = str2[b2 ... e2]
144
146
 
145
147
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
146
- if alignment.similarity < 0.6
147
- @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
148
+ similarity = alignment_similarity(_str1, _str2, alignment)
149
+ if similarity < 0.6
150
+ @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty, similarity: similarity}
148
151
  else
149
152
  @block_alignment[:blocks] << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment:alignment}
150
153
  end
@@ -250,6 +253,7 @@ class TextAlignment::TextAlignment
250
253
  "===== common =====\n" +
251
254
  stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
252
255
  when :empty
256
+ "xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
253
257
  "<<<<< string 1\n" +
254
258
  stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
255
259
  ">>>>> string 2\n" +
@@ -316,15 +320,15 @@ class TextAlignment::TextAlignment
316
320
  pletters = TextAlignment::PADDING_LETTERS
317
321
 
318
322
  # find the padding letter for str1
319
- padding_letter1 = begin
323
+ @padding_letter1 = begin
320
324
  i = pletters.index{|l| str2.index(l).nil?}
321
325
  raise RuntimeError, "Could not find a padding letter for str1" if i.nil?
322
326
  TextAlignment::PADDING_LETTERS[i]
323
327
  end
324
328
 
325
329
  # find the padding letter for str2
326
- padding_letter2 = begin
327
- i = pletters.index{|l| l != padding_letter1 && str1.index(l).nil?}
330
+ @padding_letter2 = begin
331
+ i = pletters.index{|l| l != @padding_letter1 && str1.index(l).nil?}
328
332
  raise RuntimeError, "Could not find a padding letter for str2" if i.nil?
329
333
  TextAlignment::PADDING_LETTERS[i]
330
334
  end
@@ -335,12 +339,12 @@ class TextAlignment::TextAlignment
335
339
  from = f[1]
336
340
 
337
341
  if str2.index(f[0])
338
- to = f[0] + (padding_letter1 * (f[1].length - 1))
342
+ to = f[0] + (@padding_letter1 * (f[1].length - 1))
339
343
  str1.gsub!(from, to)
340
344
  end
341
345
 
342
346
  if str1.index(f[0])
343
- to = f[0] + (padding_letter2 * (f[1].length - 1))
347
+ to = f[0] + (@padding_letter2 * (f[1].length - 1))
344
348
  str2.gsub!(from, to)
345
349
  end
346
350
  end
@@ -349,4 +353,13 @@ class TextAlignment::TextAlignment
349
353
  [str1, str2, mappings]
350
354
  end
351
355
 
356
+ def alignment_similarity(_s1, _s2, alignment)
357
+ # compute the lcs only with non-whitespace letters
358
+ lcs = alignment.sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/ && d.new_element =~ /\S/}
359
+
360
+ s1 = _s1.tr(@padding_letter1, ' ')
361
+ s2 = _s2.tr(@padding_letter2, ' ')
362
+ similarity = 2 * lcs / (s1.scan(/\S/).count + s2.scan(/\S/).count).to_f
363
+ end
364
+
352
365
  end
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.6'
2
+ VERSION = '0.6.1'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.6'
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim