pubannotation_evaluator 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/pubannotation-eval +36 -5
- data/lib/pubannotation_evaluator/pubannotation_evaluator.rb +65 -35
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f492f0c865db6673ec836d33c24b4d22f73f26e0f957ced3e679cd7b8fe9ae5
|
4
|
+
data.tar.gz: 01be9be7175842fdb5f217a2870b2734b58c912520c593dd990c2d6783a85fbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffaf46d1e897243a6ff7e87e14797a36e20d6847519797be33ba1a4074cce46fb167de0e942e3288ce5e5f6cbf9f859edf2db76b9fb0fc62693b50397b0bbc9b
|
7
|
+
data.tar.gz: 0cef53db1ee03d022f0c182e6e1babcec850ec86d18d636ebd8f42d1491732d2aead1279ba81cd6d7b265193a7019f832990b5229ba35008392d73868ca32869
|
data/bin/pubannotation-eval
CHANGED
@@ -3,15 +3,40 @@ require 'pubannotation_evaluator'
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
rdir = nil
|
6
|
+
verbose = false
|
7
|
+
|
8
|
+
soft_match_characters = PubannotationEvaluator::SOFT_MATCH_CHARACTERS
|
9
|
+
soft_match_words = PubannotationEvaluator::SOFT_MATCH_WORDS
|
10
|
+
denotation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
|
11
|
+
relation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
|
6
12
|
|
7
13
|
## command line option processing
|
8
14
|
require 'optparse'
|
9
15
|
optparse = OptionParser.new do |opts|
|
10
16
|
opts.banner = "Usage: pubannotation-eval.rb [options] annotation_file(s)"
|
11
17
|
|
12
|
-
opts.on('-r', '--rdir=
|
18
|
+
opts.on('-r', '--rdir=DIR', 'specifies the path to the directory of reference annotation_file(s).') do |dir|
|
13
19
|
rdir = dir
|
20
|
+
end
|
21
|
+
|
22
|
+
opts.on('-c', '--soft-match-characters=INT', "specifies the number of characters to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_CHARACTERS}).") do |i|
|
23
|
+
soft_match_characters = i.to_i
|
24
|
+
end
|
14
25
|
|
26
|
+
opts.on('-w', '--soft-match-words=INT', "specifies the number of words to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_WORDS}).") do |i|
|
27
|
+
soft_match_words = i.to_i
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on('-D', '--denotation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
|
31
|
+
denotation_type_match = b
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on('-R', '--relation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
|
35
|
+
relation_type_match = b
|
36
|
+
end
|
37
|
+
|
38
|
+
opts.on('-v', '--verbose', "tells it to report false positives and false negatives.") do
|
39
|
+
verbose = true
|
15
40
|
end
|
16
41
|
|
17
42
|
opts.on('-h', '--help', 'displays this screen.') do
|
@@ -27,7 +52,7 @@ if ARGV.length == 0 || rdir.nil?
|
|
27
52
|
exit
|
28
53
|
end
|
29
54
|
|
30
|
-
evaluator = PubannotationEvaluator.new
|
55
|
+
evaluator = PubannotationEvaluator.new(soft_match_characters, soft_match_words, denotation_type_match, relation_type_match)
|
31
56
|
|
32
57
|
comparison = ARGV.inject([]) do |col, filepath|
|
33
58
|
if File.extname(filepath) == '.json'
|
@@ -53,6 +78,12 @@ end
|
|
53
78
|
|
54
79
|
evaluation = evaluator.evaluate(comparison)
|
55
80
|
|
56
|
-
|
57
|
-
|
58
|
-
|
81
|
+
if verbose
|
82
|
+
false_positives = comparison.select{|m| m[:study] && m[:reference].nil?}
|
83
|
+
evaluation[:false_positives] = false_positives unless false_positives.empty?
|
84
|
+
|
85
|
+
false_negatives = comparison.select{|m| m[:study].nil? && m[:reference]}
|
86
|
+
evaluation[:false_negatives] = false_negatives unless false_negatives.empty?
|
87
|
+
end
|
88
|
+
|
89
|
+
puts JSON.generate(evaluation)
|
@@ -1,10 +1,21 @@
|
|
1
1
|
class PubannotationEvaluator
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
@
|
2
|
+
SOFT_MATCH_CHARACTERS = 20
|
3
|
+
SOFT_MATCH_WORDS = 2
|
4
|
+
EXACT_TYPE_MATCH = 'study_type == reference_type ? 1 : 0'
|
5
|
+
|
6
|
+
def initialize(soft_match_chatacters = SOFT_MATCH_CHARACTERS, soft_match_words = SOFT_MATCH_WORDS, denotation_type_match = EXACT_TYPE_MATCH, relation_type_match = EXACT_TYPE_MATCH)
|
7
|
+
@soft_match_chatacters = soft_match_chatacters
|
8
|
+
@soft_match_words = soft_match_words
|
9
|
+
@denotation_type_match = eval <<-HEREDOC
|
10
|
+
Proc.new do |study_type, reference_type|
|
11
|
+
#{denotation_type_match}
|
12
|
+
end
|
13
|
+
HEREDOC
|
14
|
+
@relation_type_match = eval <<-HEREDOC
|
15
|
+
Proc.new do |study_type, reference_type|
|
16
|
+
#{relation_type_match}
|
17
|
+
end
|
18
|
+
HEREDOC
|
8
19
|
end
|
9
20
|
|
10
21
|
# To compare two sets of annotations
|
@@ -45,6 +56,14 @@ class PubannotationEvaluator
|
|
45
56
|
{counts:counts, measures:measures}
|
46
57
|
end
|
47
58
|
|
59
|
+
def get_false_positives(comparison, project_name)
|
60
|
+
comparison.select{|m| m[:study] && m[:reference].nil?}
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_false_negatives(comparison, project_name)
|
64
|
+
comparison.select{|m| m[:study].nil? && m[:reference]}
|
65
|
+
end
|
66
|
+
|
48
67
|
private
|
49
68
|
|
50
69
|
def compare_denotations(study_denotations, reference_denotations, text)
|
@@ -61,18 +80,18 @@ class PubannotationEvaluator
|
|
61
80
|
study_denotations = study_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
|
62
81
|
reference_denotations = reference_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
|
63
82
|
|
64
|
-
|
83
|
+
mmatches = []
|
65
84
|
study_denotations.each do |s|
|
66
85
|
r_begin = reference_denotations.bsearch_index{|r| r[:span][:end] > s[:span][:begin]}
|
67
86
|
r_end = reference_denotations.bsearch_index{|r| r[:span][:begin] > s[:span][:end]}
|
68
87
|
r_end = r_end.nil? ? -1 : r_end - 1
|
69
88
|
reference_denotations[r_begin .. r_end].each do |r|
|
70
89
|
relatedness = get_relatedness_of_denotations(s, r, text)
|
71
|
-
|
90
|
+
mmatches << {study:s, reference:r, weight:relatedness} if relatedness > 0
|
72
91
|
end
|
73
92
|
end
|
74
93
|
|
75
|
-
|
94
|
+
mmatches
|
76
95
|
end
|
77
96
|
|
78
97
|
# To determine how much the two annotations match to each other based on the denotation match criteria
|
@@ -81,7 +100,7 @@ class PubannotationEvaluator
|
|
81
100
|
return 0 if s[:span][:end] <= r[:span][:begin] || s[:span][:begin] >= r[:span][:end]
|
82
101
|
|
83
102
|
# character-level tolerance
|
84
|
-
return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @
|
103
|
+
return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @soft_match_chatacters || (s[:span][:end] - r[:span][:end]).abs > @soft_match_chatacters
|
85
104
|
|
86
105
|
# word-level tolerance
|
87
106
|
front_mismatch = if s[:span][:begin] < r[:span][:begin]
|
@@ -89,19 +108,19 @@ class PubannotationEvaluator
|
|
89
108
|
else
|
90
109
|
text[r[:span][:begin] ... s[:span][:begin]]
|
91
110
|
end
|
92
|
-
return 0 if front_mismatch.count(' ') > @
|
111
|
+
return 0 if front_mismatch.count(' ') > @soft_match_words
|
93
112
|
|
94
113
|
rear_mismatch = if s[:span][:end] < r[:span][:end]
|
95
114
|
text[s[:span][:end] ... r[:span][:end]]
|
96
115
|
else
|
97
116
|
text[r[:span][:end] ... s[:span][:end]]
|
98
117
|
end
|
99
|
-
return 0 if rear_mismatch.count(' ') > @
|
118
|
+
return 0 if rear_mismatch.count(' ') > @soft_match_words
|
100
119
|
|
101
|
-
return s[:obj]
|
120
|
+
return @denotation_type_match.call(s[:obj], r[:obj])
|
102
121
|
end
|
103
122
|
|
104
|
-
def find_denotation_matches(
|
123
|
+
def find_denotation_matches(mmatches)
|
105
124
|
comp = Proc.new do |a, b|
|
106
125
|
c = a[:weight] <=> b[:weight]
|
107
126
|
if c.zero?
|
@@ -115,7 +134,7 @@ class PubannotationEvaluator
|
|
115
134
|
c
|
116
135
|
end
|
117
136
|
end
|
118
|
-
find_exclusive_matches(
|
137
|
+
find_exclusive_matches(mmatches, comp)
|
119
138
|
end
|
120
139
|
|
121
140
|
def compare_relations(study_relations, reference_relations, mmatch_denotations)
|
@@ -137,7 +156,7 @@ class PubannotationEvaluator
|
|
137
156
|
end
|
138
157
|
|
139
158
|
def get_relatedness_of_relations(s, r, mmatch_denotations)
|
140
|
-
# at least, the subject and object of the two
|
159
|
+
# at least, the subject and object of the two relations should match to each other.
|
141
160
|
match_subj = mmatch_denotations.find{|m| m[:study] && m[:reference] && m[:study][:id] == s[:subj] && m[:reference][:id] == r[:subj]}
|
142
161
|
return 0 if match_subj.nil?
|
143
162
|
|
@@ -145,7 +164,7 @@ class PubannotationEvaluator
|
|
145
164
|
return 0 if match_obj.nil?
|
146
165
|
|
147
166
|
# predicate match
|
148
|
-
match_pred_weight = s[:pred]
|
167
|
+
match_pred_weight = @relation_type_match.call(s[:pred], r[:pred])
|
149
168
|
|
150
169
|
return (match_subj[:weight] + match_obj[:weight] + match_pred_weight).to_f / 3
|
151
170
|
end
|
@@ -158,7 +177,7 @@ class PubannotationEvaluator
|
|
158
177
|
find_exclusive_matches(matches, comp)
|
159
178
|
end
|
160
179
|
|
161
|
-
|
180
|
+
# TODO: to implement it
|
162
181
|
def compare_modifications(study_modifications, reference_modifications, comparison_relations, compare_relations)
|
163
182
|
[]
|
164
183
|
end
|
@@ -200,31 +219,31 @@ class PubannotationEvaluator
|
|
200
219
|
def count(comparison)
|
201
220
|
# counts of denotations
|
202
221
|
count_study_denotations = begin
|
203
|
-
count =
|
222
|
+
count = {}
|
204
223
|
study_denotations = comparison.select{|m| m[:study] && m[:type]==:denotation}
|
205
224
|
study_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.count}
|
206
225
|
count.update('All' => study_denotations.count)
|
207
226
|
end
|
208
227
|
|
209
228
|
count_reference_denotations = begin
|
210
|
-
count =
|
229
|
+
count = {}
|
211
230
|
reference_denotations = comparison.select{|m| m[:reference] && m[:type]==:denotation}
|
212
231
|
reference_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.count}
|
213
232
|
count.update('All' => reference_denotations.count)
|
214
233
|
end
|
215
234
|
|
216
235
|
count_study_match_denotations = begin
|
217
|
-
count =
|
236
|
+
count = count_study_denotations.transform_values{|v| 0}
|
218
237
|
study_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
|
219
|
-
study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.
|
220
|
-
count.update('All' => study_match_denotations.
|
238
|
+
study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
239
|
+
count.update('All' => study_match_denotations.inject(0){|s, c| s+=c[:weight]})
|
221
240
|
end
|
222
241
|
|
223
242
|
count_reference_match_denotations = begin
|
224
|
-
count =
|
243
|
+
count = count_reference_denotations.transform_values{|v| 0}
|
225
244
|
reference_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
|
226
|
-
reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.
|
227
|
-
count.update('All' => reference_match_denotations.
|
245
|
+
reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
246
|
+
count.update('All' => reference_match_denotations.inject(0){|s, c| s+=c[:weight]})
|
228
247
|
end
|
229
248
|
|
230
249
|
counts = {
|
@@ -240,30 +259,30 @@ class PubannotationEvaluator
|
|
240
259
|
|
241
260
|
# counts of relations
|
242
261
|
count_study_relations = begin
|
243
|
-
count =
|
262
|
+
count = {}
|
244
263
|
study_relations = comparison.select{|m| m[:study] && m[:type]==:relation}
|
245
264
|
study_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.count}
|
246
265
|
count.update('All' => study_relations.count)
|
247
266
|
end
|
248
267
|
|
249
268
|
count_reference_relations = begin
|
250
|
-
count =
|
269
|
+
count = {}
|
251
270
|
reference_relations = comparison.select{|m| m[:reference] && m[:type]==:relation}
|
252
271
|
reference_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.count}
|
253
272
|
count.update('All' => reference_relations.count)
|
254
273
|
end
|
255
274
|
|
256
275
|
count_study_match_relations = begin
|
257
|
-
count =
|
276
|
+
count = count_study_relations.transform_values{|v| 0}
|
258
277
|
study_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
|
259
|
-
study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.
|
278
|
+
study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
260
279
|
count.update('All' => study_match_relations.count)
|
261
280
|
end
|
262
281
|
|
263
282
|
count_reference_match_relations = begin
|
264
|
-
count =
|
283
|
+
count = count_reference_relations.transform_values{|v| 0}
|
265
284
|
reference_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
|
266
|
-
reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.
|
285
|
+
reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
267
286
|
count.update('All' => reference_match_relations.count)
|
268
287
|
end
|
269
288
|
|
@@ -285,10 +304,21 @@ class PubannotationEvaluator
|
|
285
304
|
end
|
286
305
|
|
287
306
|
def get_prf(counts)
|
307
|
+
precision = counts[:study].keys.inject({}){|m, k| m.merge(k => counts[:matched_study][k].to_f / counts[:study][k]) if counts[:study][k] > 0}
|
308
|
+
recall = counts[:reference].keys.inject({}){|m, k| m.merge(k => counts[:matched_reference][k].to_f / counts[:reference][k]) if counts[:reference][k] > 0}
|
309
|
+
|
288
310
|
keys = (counts[:study].keys + counts[:reference].keys).uniq
|
289
|
-
|
290
|
-
|
291
|
-
|
311
|
+
fscore = keys.inject({}) do |m, k|
|
312
|
+
_p = precision[k]
|
313
|
+
_r = recall[k]
|
314
|
+
_f = if _p && _r
|
315
|
+
(_p + _r) > 0 ? 2.to_f * _p * _r / (_p + _r) : 0
|
316
|
+
else
|
317
|
+
_p ? _p : _r
|
318
|
+
end
|
319
|
+
m.merge(k => _f)
|
320
|
+
end
|
321
|
+
|
292
322
|
{
|
293
323
|
precision: precision,
|
294
324
|
recall: recall,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pubannotation_evaluator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A tool to evaluate the accuracy of a set of annotations.
|
14
14
|
email: jdkim@dbcls.rois.ac.jp
|