text_alignment 0.11.5 → 0.11.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/align_annotations +32 -9
- data/lib/text_alignment/char_mapping.rb +1 -1
- data/lib/text_alignment/mixed_alignment.rb +15 -4
- data/lib/text_alignment/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ed6071b0293e9b7fa86acf4a737c80c9d784f453d3fb77992e3d9f1acc02bbe
|
4
|
+
data.tar.gz: 7a57b7afbe21061d9aac2f96cd9f7a3c83ff2f01b6c3973905f7681a40463287
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141158c2f6b80975bacf68babc713b06d809f02b96cfd7accdecb00c6b4c362b9d130e29820074be7b89ac53c7cc93f850ec3d7ea9796eaad5021d441528e82b
|
7
|
+
data.tar.gz: 8bc4cc9ca9becc1c5638b6501268d33dd7700e8369d7c20c1ec6fd9ef11e9fd73e6a1354cb37376af250c8462e945cccf08417ba6d258f08660c53eb418e4658
|
data/bin/align_annotations
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
require 'text_alignment'
|
3
3
|
require 'json'
|
4
4
|
require 'pp'
|
5
|
+
require 'optparse'
|
5
6
|
|
6
7
|
def read_annotations(filename)
|
7
8
|
case File.extname(filename)
|
@@ -108,24 +109,46 @@ def align_mannotations(source_annotations, reference_text, alignment, debug = fa
|
|
108
109
|
end
|
109
110
|
|
110
111
|
|
112
|
+
## Options
|
113
|
+
overlap_p = false
|
114
|
+
debug_p = false
|
115
|
+
|
116
|
+
## command line option processing
|
117
|
+
require 'optparse'
|
118
|
+
optparse = OptionParser.new do |opts|
|
119
|
+
opts.banner = "Usage: align_annotations [options] target_annotations(.json|.txt) reference_text(.json|.txt)"
|
120
|
+
|
121
|
+
opts.on('-o', '--overlap', 'tells it to assume there may be overlapping texts.') do
|
122
|
+
overlap_p = true
|
123
|
+
end
|
124
|
+
|
125
|
+
opts.on('-d', '--debug', 'tells it to show debugging information.') do
|
126
|
+
debug_p = true
|
127
|
+
end
|
128
|
+
|
129
|
+
opts.on('-h', '--help', 'displays this screen.') do
|
130
|
+
puts opts
|
131
|
+
exit
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
optparse.parse!
|
136
|
+
|
111
137
|
unless ARGV.length == 2
|
112
|
-
|
113
|
-
exit
|
138
|
+
puts optparse.help
|
139
|
+
exit 1
|
114
140
|
end
|
115
141
|
|
116
142
|
source_annotations = read_annotations(ARGV[0])
|
117
143
|
reference_text = read_text(ARGV[1])
|
118
144
|
|
119
|
-
alignment = TextAlignment::TextAlignment.new(reference_text,
|
145
|
+
alignment = TextAlignment::TextAlignment.new(reference_text, !overlap_p)
|
120
146
|
|
121
147
|
target_annotations = if source_annotations.class == Array
|
122
|
-
|
123
|
-
align_mannotations(source_annotations, reference_text, alignment, false)
|
148
|
+
align_mannotations(source_annotations, reference_text, alignment, debug_p)
|
124
149
|
else
|
125
|
-
|
126
|
-
denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, false)
|
150
|
+
denotations = align_denotations(source_annotations[:denotations], source_annotations[:text], alignment, debug_p)
|
127
151
|
source_annotations.merge({text:reference_text, denotations:denotations})
|
128
152
|
end
|
129
153
|
|
130
|
-
|
131
|
-
# puts target_annotations.to_json
|
154
|
+
puts target_annotations.to_json
|
@@ -108,7 +108,7 @@ class TextAlignment::CharMapping
|
|
108
108
|
def enmap_text(_text, char_mapping)
|
109
109
|
text = _text.dup
|
110
110
|
|
111
|
-
# To execute the single letter mapping
|
111
|
+
# To execute the single letter mapping replacement
|
112
112
|
char_mapping.each do |one, long|
|
113
113
|
text.gsub!(one, long) if long.length == 1
|
114
114
|
end
|
@@ -147,13 +147,24 @@ class TextAlignment::MixedAlignment
|
|
147
147
|
# recoverbility
|
148
148
|
count_nws = sdiff.count{|d| d.old_element =~ /\S/}
|
149
149
|
count_nws_match = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/}
|
150
|
-
|
151
150
|
coverage = count_nws_match.to_f / count_nws
|
152
151
|
|
153
152
|
# fragmentation rate
|
154
|
-
|
155
|
-
|
156
|
-
|
153
|
+
frag_str = sdiff.collect do |d|
|
154
|
+
case d.action
|
155
|
+
when '='
|
156
|
+
'='
|
157
|
+
when '-'
|
158
|
+
''
|
159
|
+
when '+'
|
160
|
+
(d.new_element =~ /\S/) ? '+' : ''
|
161
|
+
else
|
162
|
+
''
|
163
|
+
end
|
164
|
+
end.join.sub(/^[^=]++/, '').sub(/[^=]+$/, '')
|
165
|
+
|
166
|
+
count_frag = frag_str.scan(/=+/).count
|
167
|
+
rate_frag = 1.0 / count_frag
|
157
168
|
|
158
169
|
similarity = coverage * rate_frag
|
159
170
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_alignment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-dictionary
|