text_alignment 0.3.13 → 0.3.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/align_annotations +34 -1
- data/lib/text_alignment/text_alignment.rb +7 -3
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f6f98465bb47a2b241dda72c8532530f5c7fdf4de49a403366bd08c256b7ff0e
|
4
|
+
data.tar.gz: 44a6c920f8f05ab3ee29a0b9fe4de38e2f6fac2386838625b77d99486189ebf0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 17f038d6d7366b8223cdd66b5ef9f3d79c8ecc39f432ac15dbfd0f3311e1197bc9c40c5cbd38a69d5778278405dcd100bc18187870ee563a7e5999246845b049
|
7
|
+
data.tar.gz: f0ded392d47821bc99c640700955686f14cc9550a13b3b8141af2af7f88f79400a3de6632f2bc3223c9e0dc82311d461de84a5ffa16aff443394b3c76540a74c
|
data/bin/align_annotations
CHANGED
@@ -103,7 +103,33 @@ target_annotations = if source_annotations.class == Array
|
|
103
103
|
else
|
104
104
|
alignment = TextAlignment::TextAlignment.new(source_annotations[:text], target_text)
|
105
105
|
|
106
|
-
|
106
|
+
# verification
|
107
|
+
source_text = source_annotations[:text]
|
108
|
+
puts "=====BEGIN"
|
109
|
+
(0 ... source_text.length).each do |p|
|
110
|
+
t = alignment.transform_begin_position(p)
|
111
|
+
if t.nil?
|
112
|
+
print source_text[p]
|
113
|
+
else
|
114
|
+
print '.'
|
115
|
+
end
|
116
|
+
end
|
117
|
+
puts
|
118
|
+
puts "=====END"
|
119
|
+
|
120
|
+
puts "=====BEGIN"
|
121
|
+
(0 .. source_text.length).each do |p|
|
122
|
+
t = alignment.transform_end_position(p)
|
123
|
+
if t.nil?
|
124
|
+
print source_text[p]
|
125
|
+
else
|
126
|
+
print '.'
|
127
|
+
end
|
128
|
+
end
|
129
|
+
puts
|
130
|
+
puts "=====END"
|
131
|
+
|
132
|
+
# pp alignment
|
107
133
|
|
108
134
|
# alignment.block_alignments.each do |a|
|
109
135
|
# if a[:alignment].nil? || a[:alignment] == :empty
|
@@ -121,6 +147,13 @@ else
|
|
121
147
|
# end
|
122
148
|
# exit
|
123
149
|
|
150
|
+
# verification of source denotations
|
151
|
+
puts "[Invalid source denotations]"
|
152
|
+
source_annotations[:denotations] do |d|
|
153
|
+
p d unless d[:span][:begin] && d[:span][:end] && d[:span][:begin] < d[:span][:end] && d[:span][:begin] >= 0 && d[:span][:end] < source_text.length
|
154
|
+
end
|
155
|
+
puts "====="
|
156
|
+
|
124
157
|
denotations = alignment.transform_hdenotations(source_annotations[:denotations])
|
125
158
|
lost_annotations += alignment.lost_annotations if alignment.lost_annotations
|
126
159
|
|
@@ -6,6 +6,8 @@ module TextAlignment; end unless defined? TextAlignment
|
|
6
6
|
|
7
7
|
TextAlignment::SIGNATURE_NGRAM = 7 unless defined? TextAlignment::SIGNATURE_NGRAM
|
8
8
|
TextAlignment::BUFFER_RATE = 0.1 unless defined? TextAlignment::BUFFER_RATE
|
9
|
+
TextAlignment::BUFFER_MIN = 10 unless defined? TextAlignment::BUFFER_MIN
|
10
|
+
|
9
11
|
|
10
12
|
class TextAlignment::TextAlignment
|
11
13
|
attr_reader :block_alignments
|
@@ -70,7 +72,7 @@ class TextAlignment::TextAlignment
|
|
70
72
|
@block_alignments << {source:{begin:0, end:e1}, target:{begin:0, end:e2}, alignment: :empty}
|
71
73
|
else
|
72
74
|
len_min = [_str1.length, _str2.length].min
|
73
|
-
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i
|
75
|
+
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
|
74
76
|
b1 = _str1.length < len_buffer ? 0 : e1 - len_buffer
|
75
77
|
b2 = _str2.length < len_buffer ? 0 : e2 - len_buffer
|
76
78
|
|
@@ -135,9 +137,11 @@ class TextAlignment::TextAlignment
|
|
135
137
|
@block_alignments << {source:{begin:b1, end:str1.length}, target:{begin:b2, end:str2.length}, alignment: :empty}
|
136
138
|
else
|
137
139
|
len_min = [_str1.length, _str2.length].min
|
138
|
-
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i
|
140
|
+
len_buffer = (len_min * (1 + TextAlignment::BUFFER_RATE)).to_i + TextAlignment::BUFFER_MIN
|
139
141
|
e1 = _str1.length < len_buffer ? str1.length : b1 + len_buffer
|
140
|
-
e2 = _str2.length < len_buffer ?
|
142
|
+
e2 = _str2.length < len_buffer ? str2.length : b2 + len_buffer
|
143
|
+
_str1 = str1[b1 ... e1]
|
144
|
+
_str2 = str2[b2 ... e2]
|
141
145
|
|
142
146
|
alignment = TextAlignment::MixedAlignment.new(_str1.downcase, _str2.downcase, mappings)
|
143
147
|
if alignment.similarity < 0.6
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|