text_alignment 0.3.16 → 0.3.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/align_annotations +2 -3
- data/lib/text_alignment/anchor_finder.rb +4 -4
- data/lib/text_alignment/text_alignment.rb +22 -30
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65e1d9b45ff59ac0a233b7656d2aca99d7e4e1051b1a03a0c7726521d4f2b280
|
4
|
+
data.tar.gz: 710a3b68c5263f26572727e6e9591ebd5fdb095af4633bd5037c61eae0bb5cb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 598df22e41bbbe0a84b6e1a6a4e631ab0d8166810afd652086595feecbf0808a886685f42e5466626cbb1d6950dd9f1181be776b9938d6174dc7735c3ace24cd
|
7
|
+
data.tar.gz: f7dedfb7e64919129f816fbba24dbd1c2e2a056c242a0865915b8a611f594399b17d051d004a846796bba1c2e89c6fb2f17116cd118ca6217cf1a5dff4f6d4d8
|
data/bin/align_annotations
CHANGED
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
|
|
103
103
|
else
|
104
104
|
alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
|
105
105
|
|
106
|
+
pp alignment
|
107
|
+
|
106
108
|
# verification
|
107
109
|
source_text = source_annotations[:text]
|
108
110
|
puts "=====BEGIN"
|
@@ -129,9 +131,6 @@ else
|
|
129
131
|
puts
|
130
132
|
puts "=====END"
|
131
133
|
|
132
|
-
pp alignment
|
133
|
-
|
134
|
-
exit
|
135
134
|
# alignment.block_alignments.each do |a|
|
136
135
|
# if a[:alignment].nil? || a[:alignment] == :empty
|
137
136
|
# # p [a[:source], a[:target]]
|
@@ -3,7 +3,7 @@ require 'string-similarity'
|
|
3
3
|
|
4
4
|
module TextAlignment; end unless defined? TextAlignment
|
5
5
|
|
6
|
-
TextAlignment::SIZE_NGRAM =
|
6
|
+
TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
|
7
7
|
TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
|
8
8
|
TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
|
9
9
|
|
@@ -32,11 +32,11 @@ class TextAlignment::AnchorFinder
|
|
32
32
|
while @beg_s1 < (@s1.length - @size_ngram)
|
33
33
|
anchor = @s1[@beg_s1, @size_ngram]
|
34
34
|
|
35
|
-
search_position = 0
|
36
|
-
|
35
|
+
# search_position = 0
|
36
|
+
search_position = @end_s2_prev
|
37
37
|
while @beg_s2 = @s2.index(anchor, search_position)
|
38
38
|
# if both the begining points are sufficiantly close to the end points of the last match
|
39
|
-
break if @
|
39
|
+
break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
|
40
40
|
|
41
41
|
left_window_s1, left_window_s2 = get_left_windows
|
42
42
|
break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
|
@@ -40,17 +40,20 @@ class TextAlignment::TextAlignment
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
52
|
-
puts "
|
53
|
-
puts
|
43
|
+
# pp mblocks
|
44
|
+
# puts "-----"
|
45
|
+
# puts
|
46
|
+
# mblocks.each do |b|
|
47
|
+
# p [b[:source], b[:target]]
|
48
|
+
# puts "---"
|
49
|
+
# puts str1[b[:source][:begin] ... b[:source][:end]]
|
50
|
+
# puts "---"
|
51
|
+
# puts str2[b[:target][:begin] ... b[:target][:end]]
|
52
|
+
# puts "====="
|
53
|
+
# puts
|
54
|
+
# end
|
55
|
+
# puts "-=-=-=-=-"
|
56
|
+
# puts
|
54
57
|
|
55
58
|
## To find block alignments
|
56
59
|
@block_alignments = []
|
@@ -78,6 +81,8 @@ class TextAlignment::TextAlignment
|
|
78
81
|
|
79
82
|
@block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
|
80
83
|
|
84
|
+
_str1 = str1[b1 ... e1]
|
85
|
+
_str2 = str2[b2 ... e2]
|
81
86
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
82
87
|
if alignment.similarity < 0.6
|
83
88
|
@block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
|
@@ -113,24 +118,11 @@ class TextAlignment::TextAlignment
|
|
113
118
|
end
|
114
119
|
|
115
120
|
# Final step
|
116
|
-
if mblocks[-1][:source][:end] < str1.length
|
117
|
-
b1 = mblocks[-1][:source][:end]
|
118
|
-
b2 = mblocks[-1][:target][:end]
|
119
|
-
|
120
|
-
if mblocks[-1][:target][:end] < str2.length
|
121
|
-
|
122
|
-
else
|
123
|
-
e1 = str1.length
|
124
|
-
e2 = str2.length
|
125
|
-
@block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
121
|
if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
|
130
122
|
b1 = mblocks[-1][:source][:end]
|
131
123
|
b2 = mblocks[-1][:target][:end]
|
132
|
-
_str1 = str1[b1 ...
|
133
|
-
_str2 = str2[b2 ...
|
124
|
+
_str1 = str1[b1 ... str1.length]
|
125
|
+
_str2 = str2[b2 ... str2.length]
|
134
126
|
|
135
127
|
unless _str1.strip.empty?
|
136
128
|
if _str2.strip.empty?
|
@@ -174,9 +166,8 @@ class TextAlignment::TextAlignment
|
|
174
166
|
nil
|
175
167
|
end
|
176
168
|
else
|
177
|
-
|
178
|
-
|
179
|
-
block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
|
169
|
+
r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
|
170
|
+
r.nil? ? nil : r + block_alignment[:target][:begin]
|
180
171
|
end
|
181
172
|
end
|
182
173
|
|
@@ -194,7 +185,8 @@ class TextAlignment::TextAlignment
|
|
194
185
|
nil
|
195
186
|
end
|
196
187
|
else
|
197
|
-
block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
|
188
|
+
r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
|
189
|
+
r.nil? ? nil : r + block_alignment[:target][:begin]
|
198
190
|
end
|
199
191
|
end
|
200
192
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|