text_alignment 0.3.16 → 0.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/align_annotations +2 -3
- data/lib/text_alignment/anchor_finder.rb +4 -4
- data/lib/text_alignment/text_alignment.rb +22 -30
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65e1d9b45ff59ac0a233b7656d2aca99d7e4e1051b1a03a0c7726521d4f2b280
|
4
|
+
data.tar.gz: 710a3b68c5263f26572727e6e9591ebd5fdb095af4633bd5037c61eae0bb5cb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 598df22e41bbbe0a84b6e1a6a4e631ab0d8166810afd652086595feecbf0808a886685f42e5466626cbb1d6950dd9f1181be776b9938d6174dc7735c3ace24cd
|
7
|
+
data.tar.gz: f7dedfb7e64919129f816fbba24dbd1c2e2a056c242a0865915b8a611f594399b17d051d004a846796bba1c2e89c6fb2f17116cd118ca6217cf1a5dff4f6d4d8
|
data/bin/align_annotations
CHANGED
@@ -103,6 +103,8 @@ target_annotations = if source_annotations.class == Array
|
|
103
103
|
else
|
104
104
|
alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
|
105
105
|
|
106
|
+
pp alignment
|
107
|
+
|
106
108
|
# verification
|
107
109
|
source_text = source_annotations[:text]
|
108
110
|
puts "=====BEGIN"
|
@@ -129,9 +131,6 @@ else
|
|
129
131
|
puts
|
130
132
|
puts "=====END"
|
131
133
|
|
132
|
-
pp alignment
|
133
|
-
|
134
|
-
exit
|
135
134
|
# alignment.block_alignments.each do |a|
|
136
135
|
# if a[:alignment].nil? || a[:alignment] == :empty
|
137
136
|
# # p [a[:source], a[:target]]
|
@@ -3,7 +3,7 @@ require 'string-similarity'
|
|
3
3
|
|
4
4
|
module TextAlignment; end unless defined? TextAlignment
|
5
5
|
|
6
|
-
TextAlignment::SIZE_NGRAM =
|
6
|
+
TextAlignment::SIZE_NGRAM = 8 unless defined? TextAlignment::SIZE_NGRAM
|
7
7
|
TextAlignment::SIZE_WINDOW = 40 unless defined? TextAlignment::SIZE_WINDOW
|
8
8
|
TextAlignment::TEXT_SIMILARITY_TRESHOLD = 0.8 unless defined? TextAlignment::TEXT_SIMILARITY_TRESHOLD
|
9
9
|
|
@@ -32,11 +32,11 @@ class TextAlignment::AnchorFinder
|
|
32
32
|
while @beg_s1 < (@s1.length - @size_ngram)
|
33
33
|
anchor = @s1[@beg_s1, @size_ngram]
|
34
34
|
|
35
|
-
search_position = 0
|
36
|
-
|
35
|
+
# search_position = 0
|
36
|
+
search_position = @end_s2_prev
|
37
37
|
while @beg_s2 = @s2.index(anchor, search_position)
|
38
38
|
# if both the begining points are sufficiantly close to the end points of the last match
|
39
|
-
break if @
|
39
|
+
break if @beg_s1 > 0 && @beg_s2 > 0 && (@beg_s1 - @end_s1_prev < 5) && (@beg_s2 >= @end_s2_prev) && (@beg_s2 - @end_s2_prev < 5)
|
40
40
|
|
41
41
|
left_window_s1, left_window_s2 = get_left_windows
|
42
42
|
break if left_window_s1 && (text_similarity(left_window_s1, left_window_s2) > TextAlignment::TEXT_SIMILARITY_TRESHOLD)
|
@@ -40,17 +40,20 @@ class TextAlignment::TextAlignment
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
52
|
-
puts "
|
53
|
-
puts
|
43
|
+
# pp mblocks
|
44
|
+
# puts "-----"
|
45
|
+
# puts
|
46
|
+
# mblocks.each do |b|
|
47
|
+
# p [b[:source], b[:target]]
|
48
|
+
# puts "---"
|
49
|
+
# puts str1[b[:source][:begin] ... b[:source][:end]]
|
50
|
+
# puts "---"
|
51
|
+
# puts str2[b[:target][:begin] ... b[:target][:end]]
|
52
|
+
# puts "====="
|
53
|
+
# puts
|
54
|
+
# end
|
55
|
+
# puts "-=-=-=-=-"
|
56
|
+
# puts
|
54
57
|
|
55
58
|
## To find block alignments
|
56
59
|
@block_alignments = []
|
@@ -78,6 +81,8 @@ class TextAlignment::TextAlignment
|
|
78
81
|
|
79
82
|
@block_alignments << {source:{begin:0, end:b1}, target:{begin:0, end:b2}, alignment: :empty} if b1 > 0
|
80
83
|
|
84
|
+
_str1 = str1[b1 ... e1]
|
85
|
+
_str2 = str2[b2 ... e2]
|
81
86
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
82
87
|
if alignment.similarity < 0.6
|
83
88
|
@block_alignments << {source:{begin:b1, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
|
@@ -113,24 +118,11 @@ class TextAlignment::TextAlignment
|
|
113
118
|
end
|
114
119
|
|
115
120
|
# Final step
|
116
|
-
if mblocks[-1][:source][:end] < str1.length
|
117
|
-
b1 = mblocks[-1][:source][:end]
|
118
|
-
b2 = mblocks[-1][:target][:end]
|
119
|
-
|
120
|
-
if mblocks[-1][:target][:end] < str2.length
|
121
|
-
|
122
|
-
else
|
123
|
-
e1 = str1.length
|
124
|
-
e2 = str2.length
|
125
|
-
@block_alignments << {source:{begin:b1, end:e1}, target:{begin:b2, end:e2}, alignment: :empty}
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
121
|
if mblocks[-1][:source][:end] < str1.length && mblocks[-1][:target][:end] < str2.length
|
130
122
|
b1 = mblocks[-1][:source][:end]
|
131
123
|
b2 = mblocks[-1][:target][:end]
|
132
|
-
_str1 = str1[b1 ...
|
133
|
-
_str2 = str2[b2 ...
|
124
|
+
_str1 = str1[b1 ... str1.length]
|
125
|
+
_str2 = str2[b2 ... str2.length]
|
134
126
|
|
135
127
|
unless _str1.strip.empty?
|
136
128
|
if _str2.strip.empty?
|
@@ -174,9 +166,8 @@ class TextAlignment::TextAlignment
|
|
174
166
|
nil
|
175
167
|
end
|
176
168
|
else
|
177
|
-
|
178
|
-
|
179
|
-
block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin]) + block_alignment[:target][:begin]
|
169
|
+
r = block_alignment[:alignment].transform_begin_position(begin_position - block_alignment[:source][:begin])
|
170
|
+
r.nil? ? nil : r + block_alignment[:target][:begin]
|
180
171
|
end
|
181
172
|
end
|
182
173
|
|
@@ -194,7 +185,8 @@ class TextAlignment::TextAlignment
|
|
194
185
|
nil
|
195
186
|
end
|
196
187
|
else
|
197
|
-
block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
|
188
|
+
r = block_alignment[:alignment].transform_end_position(end_position - block_alignment[:source][:begin])
|
189
|
+
r.nil? ? nil : r + block_alignment[:target][:begin]
|
198
190
|
end
|
199
191
|
end
|
200
192
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|