pubannotation_evaluator 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/pubannotation-eval +36 -5
- data/lib/pubannotation_evaluator/pubannotation_evaluator.rb +65 -35
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f492f0c865db6673ec836d33c24b4d22f73f26e0f957ced3e679cd7b8fe9ae5
|
4
|
+
data.tar.gz: 01be9be7175842fdb5f217a2870b2734b58c912520c593dd990c2d6783a85fbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffaf46d1e897243a6ff7e87e14797a36e20d6847519797be33ba1a4074cce46fb167de0e942e3288ce5e5f6cbf9f859edf2db76b9fb0fc62693b50397b0bbc9b
|
7
|
+
data.tar.gz: 0cef53db1ee03d022f0c182e6e1babcec850ec86d18d636ebd8f42d1491732d2aead1279ba81cd6d7b265193a7019f832990b5229ba35008392d73868ca32869
|
data/bin/pubannotation-eval
CHANGED
@@ -3,15 +3,40 @@ require 'pubannotation_evaluator'
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
rdir = nil
|
6
|
+
verbose = false
|
7
|
+
|
8
|
+
soft_match_characters = PubannotationEvaluator::SOFT_MATCH_CHARACTERS
|
9
|
+
soft_match_words = PubannotationEvaluator::SOFT_MATCH_WORDS
|
10
|
+
denotation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
|
11
|
+
relation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
|
6
12
|
|
7
13
|
## command line option processing
|
8
14
|
require 'optparse'
|
9
15
|
optparse = OptionParser.new do |opts|
|
10
16
|
opts.banner = "Usage: pubannotation-eval.rb [options] annotation_file(s)"
|
11
17
|
|
12
|
-
opts.on('-r', '--rdir=
|
18
|
+
opts.on('-r', '--rdir=DIR', 'specifies the path to the directory of reference annotation_file(s).') do |dir|
|
13
19
|
rdir = dir
|
20
|
+
end
|
21
|
+
|
22
|
+
opts.on('-c', '--soft-match-characters=INT', "specifies the number of characters to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_CHARACTERS}).") do |i|
|
23
|
+
soft_match_characters = i.to_i
|
24
|
+
end
|
14
25
|
|
26
|
+
opts.on('-w', '--soft-match-words=INT', "specifies the number of words to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_WORDS}).") do |i|
|
27
|
+
soft_match_words = i.to_i
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on('-D', '--denotation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
|
31
|
+
denotation_type_match = b
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on('-R', '--relation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
|
35
|
+
relation_type_match = b
|
36
|
+
end
|
37
|
+
|
38
|
+
opts.on('-v', '--verbose', "tells it to report false positives and false negatives.") do
|
39
|
+
verbose = true
|
15
40
|
end
|
16
41
|
|
17
42
|
opts.on('-h', '--help', 'displays this screen.') do
|
@@ -27,7 +52,7 @@ if ARGV.length == 0 || rdir.nil?
|
|
27
52
|
exit
|
28
53
|
end
|
29
54
|
|
30
|
-
evaluator = PubannotationEvaluator.new
|
55
|
+
evaluator = PubannotationEvaluator.new(soft_match_characters, soft_match_words, denotation_type_match, relation_type_match)
|
31
56
|
|
32
57
|
comparison = ARGV.inject([]) do |col, filepath|
|
33
58
|
if File.extname(filepath) == '.json'
|
@@ -53,6 +78,12 @@ end
|
|
53
78
|
|
54
79
|
evaluation = evaluator.evaluate(comparison)
|
55
80
|
|
56
|
-
|
57
|
-
|
58
|
-
|
81
|
+
if verbose
|
82
|
+
false_positives = comparison.select{|m| m[:study] && m[:reference].nil?}
|
83
|
+
evaluation[:false_positives] = false_positives unless false_positives.empty?
|
84
|
+
|
85
|
+
false_negatives = comparison.select{|m| m[:study].nil? && m[:reference]}
|
86
|
+
evaluation[:false_negatives] = false_negatives unless false_negatives.empty?
|
87
|
+
end
|
88
|
+
|
89
|
+
puts JSON.generate(evaluation)
|
@@ -1,10 +1,21 @@
|
|
1
1
|
class PubannotationEvaluator
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
@
|
2
|
+
SOFT_MATCH_CHARACTERS = 20
|
3
|
+
SOFT_MATCH_WORDS = 2
|
4
|
+
EXACT_TYPE_MATCH = 'study_type == reference_type ? 1 : 0'
|
5
|
+
|
6
|
+
def initialize(soft_match_chatacters = SOFT_MATCH_CHARACTERS, soft_match_words = SOFT_MATCH_WORDS, denotation_type_match = EXACT_TYPE_MATCH, relation_type_match = EXACT_TYPE_MATCH)
|
7
|
+
@soft_match_chatacters = soft_match_chatacters
|
8
|
+
@soft_match_words = soft_match_words
|
9
|
+
@denotation_type_match = eval <<-HEREDOC
|
10
|
+
Proc.new do |study_type, reference_type|
|
11
|
+
#{denotation_type_match}
|
12
|
+
end
|
13
|
+
HEREDOC
|
14
|
+
@relation_type_match = eval <<-HEREDOC
|
15
|
+
Proc.new do |study_type, reference_type|
|
16
|
+
#{relation_type_match}
|
17
|
+
end
|
18
|
+
HEREDOC
|
8
19
|
end
|
9
20
|
|
10
21
|
# To compare two sets of annotations
|
@@ -45,6 +56,14 @@ class PubannotationEvaluator
|
|
45
56
|
{counts:counts, measures:measures}
|
46
57
|
end
|
47
58
|
|
59
|
+
def get_false_positives(comparison, project_name)
|
60
|
+
comparison.select{|m| m[:study] && m[:reference].nil?}
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_false_negatives(comparison, project_name)
|
64
|
+
comparison.select{|m| m[:study].nil? && m[:reference]}
|
65
|
+
end
|
66
|
+
|
48
67
|
private
|
49
68
|
|
50
69
|
def compare_denotations(study_denotations, reference_denotations, text)
|
@@ -61,18 +80,18 @@ class PubannotationEvaluator
|
|
61
80
|
study_denotations = study_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
|
62
81
|
reference_denotations = reference_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
|
63
82
|
|
64
|
-
|
83
|
+
mmatches = []
|
65
84
|
study_denotations.each do |s|
|
66
85
|
r_begin = reference_denotations.bsearch_index{|r| r[:span][:end] > s[:span][:begin]}
|
67
86
|
r_end = reference_denotations.bsearch_index{|r| r[:span][:begin] > s[:span][:end]}
|
68
87
|
r_end = r_end.nil? ? -1 : r_end - 1
|
69
88
|
reference_denotations[r_begin .. r_end].each do |r|
|
70
89
|
relatedness = get_relatedness_of_denotations(s, r, text)
|
71
|
-
|
90
|
+
mmatches << {study:s, reference:r, weight:relatedness} if relatedness > 0
|
72
91
|
end
|
73
92
|
end
|
74
93
|
|
75
|
-
|
94
|
+
mmatches
|
76
95
|
end
|
77
96
|
|
78
97
|
# To determine how much the two annotations match to each other based on the denotation match criteria
|
@@ -81,7 +100,7 @@ class PubannotationEvaluator
|
|
81
100
|
return 0 if s[:span][:end] <= r[:span][:begin] || s[:span][:begin] >= r[:span][:end]
|
82
101
|
|
83
102
|
# character-level tolerance
|
84
|
-
return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @
|
103
|
+
return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @soft_match_chatacters || (s[:span][:end] - r[:span][:end]).abs > @soft_match_chatacters
|
85
104
|
|
86
105
|
# word-level tolerance
|
87
106
|
front_mismatch = if s[:span][:begin] < r[:span][:begin]
|
@@ -89,19 +108,19 @@ class PubannotationEvaluator
|
|
89
108
|
else
|
90
109
|
text[r[:span][:begin] ... s[:span][:begin]]
|
91
110
|
end
|
92
|
-
return 0 if front_mismatch.count(' ') > @
|
111
|
+
return 0 if front_mismatch.count(' ') > @soft_match_words
|
93
112
|
|
94
113
|
rear_mismatch = if s[:span][:end] < r[:span][:end]
|
95
114
|
text[s[:span][:end] ... r[:span][:end]]
|
96
115
|
else
|
97
116
|
text[r[:span][:end] ... s[:span][:end]]
|
98
117
|
end
|
99
|
-
return 0 if rear_mismatch.count(' ') > @
|
118
|
+
return 0 if rear_mismatch.count(' ') > @soft_match_words
|
100
119
|
|
101
|
-
return s[:obj]
|
120
|
+
return @denotation_type_match.call(s[:obj], r[:obj])
|
102
121
|
end
|
103
122
|
|
104
|
-
def find_denotation_matches(
|
123
|
+
def find_denotation_matches(mmatches)
|
105
124
|
comp = Proc.new do |a, b|
|
106
125
|
c = a[:weight] <=> b[:weight]
|
107
126
|
if c.zero?
|
@@ -115,7 +134,7 @@ class PubannotationEvaluator
|
|
115
134
|
c
|
116
135
|
end
|
117
136
|
end
|
118
|
-
find_exclusive_matches(
|
137
|
+
find_exclusive_matches(mmatches, comp)
|
119
138
|
end
|
120
139
|
|
121
140
|
def compare_relations(study_relations, reference_relations, mmatch_denotations)
|
@@ -137,7 +156,7 @@ class PubannotationEvaluator
|
|
137
156
|
end
|
138
157
|
|
139
158
|
def get_relatedness_of_relations(s, r, mmatch_denotations)
|
140
|
-
# at least, the subject and object of the two
|
159
|
+
# at least, the subject and object of the two relations should match to each other.
|
141
160
|
match_subj = mmatch_denotations.find{|m| m[:study] && m[:reference] && m[:study][:id] == s[:subj] && m[:reference][:id] == r[:subj]}
|
142
161
|
return 0 if match_subj.nil?
|
143
162
|
|
@@ -145,7 +164,7 @@ class PubannotationEvaluator
|
|
145
164
|
return 0 if match_obj.nil?
|
146
165
|
|
147
166
|
# predicate match
|
148
|
-
match_pred_weight = s[:pred]
|
167
|
+
match_pred_weight = @relation_type_match.call(s[:pred], r[:pred])
|
149
168
|
|
150
169
|
return (match_subj[:weight] + match_obj[:weight] + match_pred_weight).to_f / 3
|
151
170
|
end
|
@@ -158,7 +177,7 @@ class PubannotationEvaluator
|
|
158
177
|
find_exclusive_matches(matches, comp)
|
159
178
|
end
|
160
179
|
|
161
|
-
|
180
|
+
# TODO: to implement it
|
162
181
|
def compare_modifications(study_modifications, reference_modifications, comparison_relations, compare_relations)
|
163
182
|
[]
|
164
183
|
end
|
@@ -200,31 +219,31 @@ class PubannotationEvaluator
|
|
200
219
|
def count(comparison)
|
201
220
|
# counts of denotations
|
202
221
|
count_study_denotations = begin
|
203
|
-
count =
|
222
|
+
count = {}
|
204
223
|
study_denotations = comparison.select{|m| m[:study] && m[:type]==:denotation}
|
205
224
|
study_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.count}
|
206
225
|
count.update('All' => study_denotations.count)
|
207
226
|
end
|
208
227
|
|
209
228
|
count_reference_denotations = begin
|
210
|
-
count =
|
229
|
+
count = {}
|
211
230
|
reference_denotations = comparison.select{|m| m[:reference] && m[:type]==:denotation}
|
212
231
|
reference_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.count}
|
213
232
|
count.update('All' => reference_denotations.count)
|
214
233
|
end
|
215
234
|
|
216
235
|
count_study_match_denotations = begin
|
217
|
-
count =
|
236
|
+
count = count_study_denotations.transform_values{|v| 0}
|
218
237
|
study_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
|
219
|
-
study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.
|
220
|
-
count.update('All' => study_match_denotations.
|
238
|
+
study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
239
|
+
count.update('All' => study_match_denotations.inject(0){|s, c| s+=c[:weight]})
|
221
240
|
end
|
222
241
|
|
223
242
|
count_reference_match_denotations = begin
|
224
|
-
count =
|
243
|
+
count = count_reference_denotations.transform_values{|v| 0}
|
225
244
|
reference_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
|
226
|
-
reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.
|
227
|
-
count.update('All' => reference_match_denotations.
|
245
|
+
reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
246
|
+
count.update('All' => reference_match_denotations.inject(0){|s, c| s+=c[:weight]})
|
228
247
|
end
|
229
248
|
|
230
249
|
counts = {
|
@@ -240,30 +259,30 @@ class PubannotationEvaluator
|
|
240
259
|
|
241
260
|
# counts of relations
|
242
261
|
count_study_relations = begin
|
243
|
-
count =
|
262
|
+
count = {}
|
244
263
|
study_relations = comparison.select{|m| m[:study] && m[:type]==:relation}
|
245
264
|
study_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.count}
|
246
265
|
count.update('All' => study_relations.count)
|
247
266
|
end
|
248
267
|
|
249
268
|
count_reference_relations = begin
|
250
|
-
count =
|
269
|
+
count = {}
|
251
270
|
reference_relations = comparison.select{|m| m[:reference] && m[:type]==:relation}
|
252
271
|
reference_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.count}
|
253
272
|
count.update('All' => reference_relations.count)
|
254
273
|
end
|
255
274
|
|
256
275
|
count_study_match_relations = begin
|
257
|
-
count =
|
276
|
+
count = count_study_relations.transform_values{|v| 0}
|
258
277
|
study_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
|
259
|
-
study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.
|
278
|
+
study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
260
279
|
count.update('All' => study_match_relations.count)
|
261
280
|
end
|
262
281
|
|
263
282
|
count_reference_match_relations = begin
|
264
|
-
count =
|
283
|
+
count = count_reference_relations.transform_values{|v| 0}
|
265
284
|
reference_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
|
266
|
-
reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.
|
285
|
+
reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
|
267
286
|
count.update('All' => reference_match_relations.count)
|
268
287
|
end
|
269
288
|
|
@@ -285,10 +304,21 @@ class PubannotationEvaluator
|
|
285
304
|
end
|
286
305
|
|
287
306
|
def get_prf(counts)
|
307
|
+
precision = counts[:study].keys.inject({}){|m, k| m.merge(k => counts[:matched_study][k].to_f / counts[:study][k]) if counts[:study][k] > 0}
|
308
|
+
recall = counts[:reference].keys.inject({}){|m, k| m.merge(k => counts[:matched_reference][k].to_f / counts[:reference][k]) if counts[:reference][k] > 0}
|
309
|
+
|
288
310
|
keys = (counts[:study].keys + counts[:reference].keys).uniq
|
289
|
-
|
290
|
-
|
291
|
-
|
311
|
+
fscore = keys.inject({}) do |m, k|
|
312
|
+
_p = precision[k]
|
313
|
+
_r = recall[k]
|
314
|
+
_f = if _p && _r
|
315
|
+
(_p + _r) > 0 ? 2.to_f * _p * _r / (_p + _r) : 0
|
316
|
+
else
|
317
|
+
_p ? _p : _r
|
318
|
+
end
|
319
|
+
m.merge(k => _f)
|
320
|
+
end
|
321
|
+
|
292
322
|
{
|
293
323
|
precision: precision,
|
294
324
|
recall: recall,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pubannotation_evaluator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A tool to evaluate the accuracy of a set of annotations.
|
14
14
|
email: jdkim@dbcls.rois.ac.jp
|