text_alignment 0.3.15 → 0.3.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8252929e7b74251db493ad991332d0da8a92f35441e2a442d05b6fb29139d657
4
- data.tar.gz: c9e41127fb231e4db2843696c1029b35711449c5bf5104158da301d03b817131
3
+ metadata.gz: 179976ef6ac286a34343f99eef5cf8ee3d26997c1a8e1c8e9348793773ac044a
4
+ data.tar.gz: e15a57b2460a21d2607e3e4775ad32c5a760d9f00067c898b966e21777c241d4
5
5
  SHA512:
6
- metadata.gz: 1fc8da7324d71cf25edbec9765ab512928323079472736ea4e294abb12dfafc87f55d71cb49c371470811775bd489d3c91cce4a787b99faa305f2f326dc80c77
7
- data.tar.gz: f694c99216b59dd693a6acdfffc727fe74b5c189b4b9583b31fb7e6394319a3176de76237a142dac3770bcd1fbbc467d5d1e97a7225ed993a21246a66de8b2ec
6
+ metadata.gz: b3742565325c8ce8b4ce35093b4524b1d9182b51e332f50d9376ce2c22af8918a168b1fcc5764e4313e819c546684a3fc9411d8ee39607d95557924975bd4143
7
+ data.tar.gz: 6f68f68799075990fce62f4454c2ef1f1ef4ff260eac44a5332744a9c21605ac14eb35c9d6d24bedb31681f0995222699d8018727f7e70a45c11076df1c82212
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
103
103
  else
104
104
  alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
105
105
 
106
+ pp alignment
107
+
106
108
  # verification
107
109
  source_text = source_annotations[:text]
108
110
  puts "=====BEGIN"
@@ -129,9 +131,6 @@ else
129
131
  puts
130
132
  puts "=====END"
131
133
 
132
- pp alignment
133
-
134
- exit
135
134
  # alignment.block_alignments.each do |a|
136
135
  # if a[:alignment].nil? || a[:alignment] == :empty
137
136
  # # p [a[:source], a[:target]]
@@ -4,7 +4,7 @@ require 'string-similarity'
4
4
  module TextAlignment; end unless defined? TextAlignment
5
5
 
6
6
  TextAlignment::SIZE_NGRAM = 10 unless defined? TextAlignment::SIZE_NGRAM
7
- TextAlignment::SIZE_WINDOW = 30 unless defined? TextAlignment::SIZE_WINDOW
7
+ TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
8
8
  TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
9
9
 
10
10
  class TextAlignment::AnchorFinder
@@ -23,6 +23,7 @@ class TextAlignment::AnchorFinder
23
23
 
24
24
  # current position in s1
25
25
  @beg_s1 = 0
26
+ @end_s1_prev = 0
26
27
  @end_s2_prev = 0
27
28
  end
28
29
 
@@ -31,8 +32,8 @@ class TextAlignment::AnchorFinder
31
32
  while @beg_s1 < (@s1.length - @size_ngram)
32
33
  anchor = @s1[@beg_s1, @size_ngram]
33
34
 
34
- search_position = 0
35
- # search_position = @end_s2_prev
35
+ # search_position = 0
36
+ search_position = @end_s2_prev
36
37
  while @beg_s2 = @s2.index(anchor, search_position)
37
38
  # if both the begining points are sufficiantly close to the end points of the last match
38
39
  break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
@@ -56,7 +57,7 @@ class TextAlignment::AnchorFinder
56
57
  # extend the block
57
58
  b1 = @beg_s1
58
59
  b2 = @beg_s2
59
- while b1 > -1 && b2 > -1 && @s1[b1] == @s2[b2]
60
+ while b1 >= @end_s1_prev && b2 > -1 && @s1[b1] == @s2[b2]
60
61
  b1 -= 1; b2 -= 1
61
62
  end
62
63
  b1 += 1; b2 += 1
@@ -40,6 +40,9 @@ class TextAlignment::TextAlignment
40
40
  end
41
41
  end
42
42
 
43
+ # pp mblocks
44
+ # puts "-----"
45
+ # puts
43
46
  # mblocks.each do |b|
44
47
  # p [b[:source], b[:target]]
45
48
  # puts "---"
@@ -78,6 +81,8 @@ class TextAlignment::TextAlignment
78
81
 
79
82
  @block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
80
83
 
84
+ _str1 = str1[b1 ... e1]
85
+ _str2 = str2[b2 ... e2]
81
86
  alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
82
87
  if alignment.similarity < 0.6
83
88
  @block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
@@ -113,19 +118,6 @@ class TextAlignment::TextAlignment
113
118
  end
114
119
 
115
120
  # Final step
116
- if mblocks[-1][:source][:end] < str1.length
117
- b1 = mblocks[-1][:source][:end]
118
- b2 = mblocks[-1][:target][:end]
119
-
120
- if mblocks[-1][:target][:end] < str2.length
121
-
122
- else
123
- e1 = str1.length
124
- e2 = str2.length
125
- @block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
126
- end
127
- end
128
-
129
121
  if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
130
122
  b1 = mblocks[-1][:source][:end]
131
123
  b2 = mblocks[-1][:target][:end]
@@ -174,7 +166,8 @@ class TextAlignment::TextAlignment
174
166
  nil
175
167
  end
176
168
  else
177
- block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
169
+ r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
170
+ r.nil? ? nil : r + block_alignment[:target][:begin]
178
171
  end
179
172
  end
180
173
 
@@ -192,7 +185,8 @@ class TextAlignment::TextAlignment
192
185
  nil
193
186
  end
194
187
  else
195
- block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
188
+ r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
189
+ r.nil? ? nil : r + block_alignment[:target][:begin]
196
190
  end
197
191
  end
198
192
 
@@ -1,3 +1,3 @@
1
1
  class TextAlignment
2
- VERSION = '0.3.15'
2
+ VERSION = '0.3.20'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_alignment
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.15
4
+ version: 0.3.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-17 00:00:00.000000000 Z
11
+ date: 2020-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-dictionary