text_alignment 0.3.14 → 0.3.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/align_annotations +10 -2
- data/lib/text_alignment/anchor_finder.rb +6 -5
- data/lib/text_alignment/text_alignment.rb +12 -4
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea57d01970fdb56a95a7929803949a553965692fb3f4748eec72fe026f9a79cf
|
4
|
+
data.tar.gz: 96397baa91646b3eb05a346ff699930b6dacf7d38075273b64ce7916f32d6275
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1d1e7650c35d9bae35a7f1dc2948dbf97fa8f71a86f1f83c5dda9cb64b7179e96ae219db88951e65c2988f078900d960784c4c74489a074654ed893c408be97f
|
7
|
+
data.tar.gz: 49afaec2b6332dc4038c1f0d0e930bf20ce61a6d4933ff3758bbbfea3e05cf347277aefc341d9c3b3d1f8a4692cec24b901528c2875875bacecc1caa7cf9159c
|
data/bin/align_annotations
CHANGED
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
|
|
103
103
|
else
|
104
104
|
alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
|
105
105
|
|
106
|
+
pp alignment
|
107
|
+
|
106
108
|
# verification
|
107
109
|
source_text = source_annotations[:text]
|
108
110
|
puts "=====BEGIN"
|
@@ -129,8 +131,6 @@ else
|
|
129
131
|
puts
|
130
132
|
puts "=====END"
|
131
133
|
|
132
|
-
# pp alignment
|
133
|
-
|
134
134
|
# alignment.block_alignments.each do |a|
|
135
135
|
# if a[:alignment].nil? || a[:alignment] == :empty
|
136
136
|
# # p [a[:source], a[:target]]
|
@@ -153,8 +153,16 @@ else
|
|
153
153
|
p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
|
154
154
|
end
|
155
155
|
puts "====="
|
156
|
+
puts
|
156
157
|
|
157
158
|
denotations = alignment.transform_hdenotations(source_annotations[:denotations])
|
159
|
+
puts "[Invalid transformation]"
|
160
|
+
denotations.each do |d|
|
161
|
+
p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < target_text.length
|
162
|
+
end
|
163
|
+
puts "====="
|
164
|
+
puts
|
165
|
+
|
158
166
|
lost_annotations += alignment.lost_annotations if alignment.lost_annotations
|
159
167
|
|
160
168
|
source_annotations.merge({text:target_text, denotations:denotations})
|
@@ -3,8 +3,8 @@ require 'string-similarity'
|
|
3
3
|
|
4
4
|
module TextAlignment; end unless defined? TextAlignment
|
5
5
|
|
6
|
-
TextAlignment::SIZE_NGRAM =
|
7
|
-
TextAlignment::SIZE_WINDOW =
|
6
|
+
TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
|
7
|
+
TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
|
8
8
|
TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
|
9
9
|
|
10
10
|
class TextAlignment::AnchorFinder
|
@@ -23,6 +23,7 @@ class TextAlignment::AnchorFinder
|
|
23
23
|
|
24
24
|
# current position in s1
|
25
25
|
@beg_s1 = 0
|
26
|
+
@end_s1_prev = 0
|
26
27
|
@end_s2_prev = 0
|
27
28
|
end
|
28
29
|
|
@@ -31,8 +32,8 @@ class TextAlignment::AnchorFinder
|
|
31
32
|
while @beg_s1 < (@s1.length - @size_ngram)
|
32
33
|
anchor = @s1[@beg_s1, @size_ngram]
|
33
34
|
|
34
|
-
search_position = 0
|
35
|
-
|
35
|
+
# search_position = 0
|
36
|
+
search_position = @end_s2_prev
|
36
37
|
while @beg_s2 = @s2.index(anchor, search_position)
|
37
38
|
# if both the begining points are sufficiantly close to the end points of the last match
|
38
39
|
break if @end_s1_prev && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
|
@@ -56,7 +57,7 @@ class TextAlignment::AnchorFinder
|
|
56
57
|
# extend the block
|
57
58
|
b1 = @beg_s1
|
58
59
|
b2 = @beg_s2
|
59
|
-
while b1
|
60
|
+
while b1 >= @end_s1_prev && b2 > -1 && @s1[b1] == @s2[b2]
|
60
61
|
b1 -= 1; b2 -= 1
|
61
62
|
end
|
62
63
|
b1 += 1; b2 += 1
|
@@ -40,6 +40,10 @@ class TextAlignment::TextAlignment
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
+
pp mblocks
|
44
|
+
puts "-----"
|
45
|
+
puts
|
46
|
+
|
43
47
|
# mblocks.each do |b|
|
44
48
|
# p [b[:source], b[:target]]
|
45
49
|
# puts "---"
|
@@ -170,10 +174,12 @@ class TextAlignment::TextAlignment
|
|
170
174
|
if begin_position == block_alignment[:source][:begin]
|
171
175
|
block_alignment[:target][:begin]
|
172
176
|
else
|
173
|
-
raise "lost annotation"
|
177
|
+
# raise "lost annotation"
|
178
|
+
nil
|
174
179
|
end
|
175
180
|
else
|
176
|
-
block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
|
181
|
+
r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
|
182
|
+
r.nil? ? nil : r + block_alignment[:target][:begin]
|
177
183
|
end
|
178
184
|
end
|
179
185
|
|
@@ -187,10 +193,12 @@ class TextAlignment::TextAlignment
|
|
187
193
|
if end_position == block_alignment[:source][:end]
|
188
194
|
block_alignment[:target][:end]
|
189
195
|
else
|
190
|
-
raise "lost annotation"
|
196
|
+
# raise "lost annotation"
|
197
|
+
nil
|
191
198
|
end
|
192
199
|
else
|
193
|
-
block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
|
200
|
+
r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
|
201
|
+
r.nil? ? nil : r + block_alignment[:target][:begin]
|
194
202
|
end
|
195
203
|
end
|
196
204
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|