text_alignment 0.3.17 → 0.3.22

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5fc0292470086de4cc05275d8d19dcd534b1bfe66c5560d2043451530a657c5e
4
- data.tar.gz: c6eee42e0b2b2111f063abd4bd37cbd83a6eff7c236eb032832a23fe77bb7ed5
3
+ metadata.gz: aa961ae295a43e3912878dab34fac18a8cbca395eea999a781a538ce17c61bf3
4
+ data.tar.gz: 38ae815d192104b9bff4664c5749910675ae4e5d9c895fa63c26fdc24f6d7b91
5
5
  SHA512:
6
- metadata.gz: 22d172f3fdaa4549edd4f9e4541c03b947c6ae8bab61e46f55c0378120b632639c70d7a30361b321065a978be6930844aedbce6c5e7591521c016c429f4eeb14
7
- data.tar.gz: 3babca5408aa2dd7ce23e40dbb7909cb4b60f492583c48cb9a6fe085f84e7cf1d4f9b07ea3942455ca984c98dc3d91fce425b8dc116c98a5b8d0ff753fc232c6
6
+ metadata.gz: 185bdcd0298932a09795723085734d79aaddf638a45c871ff1d29e6c89e02041ddd12f848e3350244b27ac050adc7c52f7e1dedfe6664159d781c25d6dd55f1b
7
+ data.tar.gz: 0b88cd8aff0f529b5cad32a360aba476e90d525bc77f89c5c5c59dcef5a6d9077c849acdf9f289f61fea2a50ebe927fd9a53c1eb0bdca9c7661900afdaa8fb94
@@ -108,7 +108,7 @@ else
108
108
  # verification
109
109
  source_text = source_annotations[:text]
110
110
  puts "=====BEGIN"
111
- (0 ... source_text.length).each do |p|
111
+ (0 ... source_text.rstrip.length).each do |p|
112
112
  t = alignment.transform_begin_position(p)
113
113
  if t.nil?
114
114
  print source_text[p]
@@ -120,7 +120,7 @@ else
120
120
  puts "=====END"
121
121
 
122
122
  puts "=====BEGIN"
123
- (0 .. source_text.length).each do |p|
123
+ (0 .. source_text.rstrip.length).each do |p|
124
124
  t = alignment.transform_end_position(p)
125
125
  if t.nil?
126
126
  print source_text[p]
@@ -3,7 +3,7 @@ require 'string-similarity'
3
3
 
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
- TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
6
+ TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
7
7
  TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
8
  TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
@@ -30,13 +30,17 @@ class TextAlignment::AnchorFinder
30
30
  def get_next_anchor
31
31
  # find the position of an anchor ngram in s1 and s2
32
32
  while @beg_s1 < (@s1.length - @size_ngram)
33
+ if [' ', "\n", "\t"].include? @s1[@beg_s1]
34
+ @beg_s1 += 1
35
+ next
36
+ end
33
37
  anchor = @s1[@beg_s1, @size_ngram]
34
38
 
35
- search_position = 0
36
- # search_position = @end_s2_prev
39
+ # search_position = 0
40
+ search_position = @end_s2_prev
37
41
  while @beg_s2 = @s2.index(anchor, search_position)
38
42
  # if both the begining points are sufficiantly close to the end points of the last match
39
- break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
43
+ break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
40
44
 
41
45
  left_window_s1, left_window_s2 = get_left_windows
42
46
  break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
@@ -57,9 +61,10 @@ class TextAlignment::AnchorFinder
57
61
  # extend the block
58
62
  b1 = @beg_s1
59
63
  b2 = @beg_s2
60
- while b1 >= @end_s1_prev && b2 > -1 && @s1[b1] == @s2[b2]
64
+ while b1 >= @end_s1_prev && b2 >= @end_s2_prev && @s1[b1] == @s2[b2]
61
65
  b1 -= 1; b2 -= 1
62
66
  end
67
+
63
68
  b1 += 1; b2 += 1
64
69
 
65
70
  e1 = @beg_s1 + @size_ngram
@@ -40,6 +40,9 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
+ # pp mblocks
44
+ # puts "-----"
45
+ # puts
43
46
  # mblocks.each do |b|
44
47
  # p [b[:source], b[:target]]
45
48
  # puts "---"
@@ -78,6 +81,8 @@ class TextAlignment::TextAlignment
78
81
 
79
82
  @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
80
83
 
84
+ _str1 = str1[b1 ... e1]
85
+ _str2 = str2[b2 ... e2]
81
86
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
82
87
  if alignment.similarity < 0.6
83
88
  @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
@@ -113,24 +118,11 @@ class TextAlignment::TextAlignment
113
118
  end
114
119
 
115
120
  # Final step
116
- if mblocks[-1][:source][:end] < str1.length
117
- b1 = mblocks[-1][:source][:end]
118
- b2 = mblocks[-1][:target][:end]
119
-
120
- if mblocks[-1][:target][:end] < str2.length
121
-
122
- else
123
- e1 = str1.length
124
- e2 = str2.length
125
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
126
- end
127
- end
128
-
129
121
  if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
130
122
  b1 = mblocks[-1][:source][:end]
131
123
  b2 = mblocks[-1][:target][:end]
132
- _str1 = str1[b1 ... -1]
133
- _str2 = str2[b2 ... -1]
124
+ _str1 = str1[b1 ... str1.length]
125
+ _str2 = str2[b2 ... str2.length]
134
126
 
135
127
  unless _str1.strip.empty?
136
128
  if _str2.strip.empty?
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.17'
2
+ VERSION = '0.3.22'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.17
4
+ version: 0.3.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-18 00:00:00.000000000 Z
11
+ date: 2020-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary