pubannotation_evaluator 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 469a3def4423b8d7fb83f678b13dfd387ff8c2bd0f3a6267253f7ca30f688c86
4
- data.tar.gz: e4b34328fd5658234227701e00440f9a0f7f3492cef0e47eb03ea3203f9a9001
3
+ metadata.gz: 1f492f0c865db6673ec836d33c24b4d22f73f26e0f957ced3e679cd7b8fe9ae5
4
+ data.tar.gz: 01be9be7175842fdb5f217a2870b2734b58c912520c593dd990c2d6783a85fbd
5
5
  SHA512:
6
- metadata.gz: a360dd7c5d6b3b519c898d2047219570cd1fbcb239af72d09fffee02a69be7b1d888a2070e0b2d5defd190abe3394f99fa2591ac38fd0254b16d60e6cc9a20f9
7
- data.tar.gz: bddde4792bda694d88b200893d22d8f8ff993b48f38068ee797b28abf652602c994f3d857f3082d45fd25a1f3652dd49e45600de87c9eb0f3337dedeb310d84c
6
+ metadata.gz: ffaf46d1e897243a6ff7e87e14797a36e20d6847519797be33ba1a4074cce46fb167de0e942e3288ce5e5f6cbf9f859edf2db76b9fb0fc62693b50397b0bbc9b
7
+ data.tar.gz: 0cef53db1ee03d022f0c182e6e1babcec850ec86d18d636ebd8f42d1491732d2aead1279ba81cd6d7b265193a7019f832990b5229ba35008392d73868ca32869
@@ -3,15 +3,40 @@ require 'pubannotation_evaluator'
3
3
  require 'json'
4
4
 
5
5
  rdir = nil
6
+ verbose = false
7
+
8
+ soft_match_characters = PubannotationEvaluator::SOFT_MATCH_CHARACTERS
9
+ soft_match_words = PubannotationEvaluator::SOFT_MATCH_WORDS
10
+ denotation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
11
+ relation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
6
12
 
7
13
  ## command line option processing
8
14
  require 'optparse'
9
15
  optparse = OptionParser.new do |opts|
10
16
  opts.banner = "Usage: pubannotation-eval.rb [options] annotation_file(s)"
11
17
 
12
- opts.on('-r', '--rdir=directory', 'specifies the path to the directory of reference annotation_file(s).') do |dir|
18
+ opts.on('-r', '--rdir=DIR', 'specifies the path to the directory of reference annotation_file(s).') do |dir|
13
19
  rdir = dir
20
+ end
21
+
22
+ opts.on('-c', '--soft-match-characters=INT', "specifies the number of characters to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_CHARACTERS}).") do |i|
23
+ soft_match_characters = i.to_i
24
+ end
14
25
 
26
+ opts.on('-w', '--soft-match-words=INT', "specifies the number of words to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_WORDS}).") do |i|
27
+ soft_match_words = i.to_i
28
+ end
29
+
30
+ opts.on('-D', '--denotation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
31
+ denotation_type_match = b
32
+ end
33
+
34
+ opts.on('-R', '--relation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
35
+ relation_type_match = b
36
+ end
37
+
38
+ opts.on('-v', '--verbose', "tells it to report false positives and false negatives.") do
39
+ verbose = true
15
40
  end
16
41
 
17
42
  opts.on('-h', '--help', 'displays this screen.') do
@@ -27,7 +52,7 @@ if ARGV.length == 0 || rdir.nil?
27
52
  exit
28
53
  end
29
54
 
30
- evaluator = PubannotationEvaluator.new
55
+ evaluator = PubannotationEvaluator.new(soft_match_characters, soft_match_words, denotation_type_match, relation_type_match)
31
56
 
32
57
  comparison = ARGV.inject([]) do |col, filepath|
33
58
  if File.extname(filepath) == '.json'
@@ -53,6 +78,12 @@ end
53
78
 
54
79
  evaluation = evaluator.evaluate(comparison)
55
80
 
56
- false_positives = comparison.select{|m| m[:study] && m[:reference].nil?}
57
- false_negatives = comparison.select{|m| m[:study].nil? && m[:reference]}
58
- puts JSON.generate(evaluation.merge(false_positives:false_positives, false_negatives:false_negatives))
81
+ if verbose
82
+ false_positives = comparison.select{|m| m[:study] && m[:reference].nil?}
83
+ evaluation[:false_positives] = false_positives unless false_positives.empty?
84
+
85
+ false_negatives = comparison.select{|m| m[:study].nil? && m[:reference]}
86
+ evaluation[:false_negatives] = false_negatives unless false_negatives.empty?
87
+ end
88
+
89
+ puts JSON.generate(evaluation)
@@ -1,10 +1,21 @@
1
1
  class PubannotationEvaluator
2
- BOUNDARY_SOFTNESS_CHARACTER = 20
3
- BOUNDARY_SOFTNESS_WORD = 2
4
-
5
- def initialize(boundary_softness_character = BOUNDARY_SOFTNESS_CHARACTER, boundary_softness_word = BOUNDARY_SOFTNESS_WORD)
6
- @boundary_softness_character = boundary_softness_character
7
- @boundary_softness_word = boundary_softness_word
2
+ SOFT_MATCH_CHARACTERS = 20
3
+ SOFT_MATCH_WORDS = 2
4
+ EXACT_TYPE_MATCH = 'study_type == reference_type ? 1 : 0'
5
+
6
+ def initialize(soft_match_chatacters = SOFT_MATCH_CHARACTERS, soft_match_words = SOFT_MATCH_WORDS, denotation_type_match = EXACT_TYPE_MATCH, relation_type_match = EXACT_TYPE_MATCH)
7
+ @soft_match_chatacters = soft_match_chatacters
8
+ @soft_match_words = soft_match_words
9
+ @denotation_type_match = eval <<-HEREDOC
10
+ Proc.new do |study_type, reference_type|
11
+ #{denotation_type_match}
12
+ end
13
+ HEREDOC
14
+ @relation_type_match = eval <<-HEREDOC
15
+ Proc.new do |study_type, reference_type|
16
+ #{relation_type_match}
17
+ end
18
+ HEREDOC
8
19
  end
9
20
 
10
21
  # To compare two sets of annotations
@@ -45,6 +56,14 @@ class PubannotationEvaluator
45
56
  {counts:counts, measures:measures}
46
57
  end
47
58
 
59
+ def get_false_positives(comparison, project_name)
60
+ comparison.select{|m| m[:study] && m[:reference].nil?}
61
+ end
62
+
63
+ def get_false_negatives(comparison, project_name)
64
+ comparison.select{|m| m[:study].nil? && m[:reference]}
65
+ end
66
+
48
67
  private
49
68
 
50
69
  def compare_denotations(study_denotations, reference_denotations, text)
@@ -61,18 +80,18 @@ class PubannotationEvaluator
61
80
  study_denotations = study_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
62
81
  reference_denotations = reference_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
63
82
 
64
- matches = []
83
+ mmatches = []
65
84
  study_denotations.each do |s|
66
85
  r_begin = reference_denotations.bsearch_index{|r| r[:span][:end] > s[:span][:begin]}
67
86
  r_end = reference_denotations.bsearch_index{|r| r[:span][:begin] > s[:span][:end]}
68
87
  r_end = r_end.nil? ? -1 : r_end - 1
69
88
  reference_denotations[r_begin .. r_end].each do |r|
70
89
  relatedness = get_relatedness_of_denotations(s, r, text)
71
- matches << {study:s, reference:r, weight:relatedness} if relatedness > 0
90
+ mmatches << {study:s, reference:r, weight:relatedness} if relatedness > 0
72
91
  end
73
92
  end
74
93
 
75
- matches
94
+ mmatches
76
95
  end
77
96
 
78
97
  # To determine how much the two annotations match to each other based on the denotation match criteria
@@ -81,7 +100,7 @@ class PubannotationEvaluator
81
100
  return 0 if s[:span][:end] <= r[:span][:begin] || s[:span][:begin] >= r[:span][:end]
82
101
 
83
102
  # character-level tolerance
84
- return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @boundary_softness_character || (s[:span][:end] - r[:span][:end]).abs > @boundary_softness_character
103
+ return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @soft_match_chatacters || (s[:span][:end] - r[:span][:end]).abs > @soft_match_chatacters
85
104
 
86
105
  # word-level tolerance
87
106
  front_mismatch = if s[:span][:begin] < r[:span][:begin]
@@ -89,19 +108,19 @@ class PubannotationEvaluator
89
108
  else
90
109
  text[r[:span][:begin] ... s[:span][:begin]]
91
110
  end
92
- return 0 if front_mismatch.count(' ') > @boundary_softness_word
111
+ return 0 if front_mismatch.count(' ') > @soft_match_words
93
112
 
94
113
  rear_mismatch = if s[:span][:end] < r[:span][:end]
95
114
  text[s[:span][:end] ... r[:span][:end]]
96
115
  else
97
116
  text[r[:span][:end] ... s[:span][:end]]
98
117
  end
99
- return 0 if rear_mismatch.count(' ') > @boundary_softness_word
118
+ return 0 if rear_mismatch.count(' ') > @soft_match_words
100
119
 
101
- return s[:obj] == r[:obj] ? 1 : 0.5
120
+ return @denotation_type_match.call(s[:obj], r[:obj])
102
121
  end
103
122
 
104
- def find_denotation_matches(matches)
123
+ def find_denotation_matches(mmatches)
105
124
  comp = Proc.new do |a, b|
106
125
  c = a[:weight] <=> b[:weight]
107
126
  if c.zero?
@@ -115,7 +134,7 @@ class PubannotationEvaluator
115
134
  c
116
135
  end
117
136
  end
118
- find_exclusive_matches(matches, comp)
137
+ find_exclusive_matches(mmatches, comp)
119
138
  end
120
139
 
121
140
  def compare_relations(study_relations, reference_relations, mmatch_denotations)
@@ -137,7 +156,7 @@ class PubannotationEvaluator
137
156
  end
138
157
 
139
158
  def get_relatedness_of_relations(s, r, mmatch_denotations)
140
- # at least, the subject and object of the two relateions should match to each other.
159
+ # at least, the subject and object of the two relations should match to each other.
141
160
  match_subj = mmatch_denotations.find{|m| m[:study] && m[:reference] && m[:study][:id] == s[:subj] && m[:reference][:id] == r[:subj]}
142
161
  return 0 if match_subj.nil?
143
162
 
@@ -145,7 +164,7 @@ class PubannotationEvaluator
145
164
  return 0 if match_obj.nil?
146
165
 
147
166
  # predicate match
148
- match_pred_weight = s[:pred] == r[:pred] ? 1 : 0
167
+ match_pred_weight = @relation_type_match.call(s[:pred], r[:pred])
149
168
 
150
169
  return (match_subj[:weight] + match_obj[:weight] + match_pred_weight).to_f / 3
151
170
  end
@@ -158,7 +177,7 @@ class PubannotationEvaluator
158
177
  find_exclusive_matches(matches, comp)
159
178
  end
160
179
 
161
-
180
+ # TODO: to implement it
162
181
  def compare_modifications(study_modifications, reference_modifications, comparison_relations, compare_relations)
163
182
  []
164
183
  end
@@ -200,31 +219,31 @@ class PubannotationEvaluator
200
219
  def count(comparison)
201
220
  # counts of denotations
202
221
  count_study_denotations = begin
203
- count = Hash.new(0)
222
+ count = {}
204
223
  study_denotations = comparison.select{|m| m[:study] && m[:type]==:denotation}
205
224
  study_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.count}
206
225
  count.update('All' => study_denotations.count)
207
226
  end
208
227
 
209
228
  count_reference_denotations = begin
210
- count = Hash.new(0)
229
+ count = {}
211
230
  reference_denotations = comparison.select{|m| m[:reference] && m[:type]==:denotation}
212
231
  reference_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.count}
213
232
  count.update('All' => reference_denotations.count)
214
233
  end
215
234
 
216
235
  count_study_match_denotations = begin
217
- count = Hash.new(0)
236
+ count = count_study_denotations.transform_values{|v| 0}
218
237
  study_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
219
- study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.count}
220
- count.update('All' => study_match_denotations.count)
238
+ study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
239
+ count.update('All' => study_match_denotations.inject(0){|s, c| s+=c[:weight]})
221
240
  end
222
241
 
223
242
  count_reference_match_denotations = begin
224
- count = Hash.new(0)
243
+ count = count_reference_denotations.transform_values{|v| 0}
225
244
  reference_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
226
- reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.count}
227
- count.update('All' => reference_match_denotations.count)
245
+ reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
246
+ count.update('All' => reference_match_denotations.inject(0){|s, c| s+=c[:weight]})
228
247
  end
229
248
 
230
249
  counts = {
@@ -240,30 +259,30 @@ class PubannotationEvaluator
240
259
 
241
260
  # counts of relations
242
261
  count_study_relations = begin
243
- count = Hash.new(0)
262
+ count = {}
244
263
  study_relations = comparison.select{|m| m[:study] && m[:type]==:relation}
245
264
  study_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.count}
246
265
  count.update('All' => study_relations.count)
247
266
  end
248
267
 
249
268
  count_reference_relations = begin
250
- count = Hash.new(0)
269
+ count = {}
251
270
  reference_relations = comparison.select{|m| m[:reference] && m[:type]==:relation}
252
271
  reference_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.count}
253
272
  count.update('All' => reference_relations.count)
254
273
  end
255
274
 
256
275
  count_study_match_relations = begin
257
- count = Hash.new(0)
276
+ count = count_study_relations.transform_values{|v| 0}
258
277
  study_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
259
- study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.count}
278
+ study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
260
279
  count.update('All' => study_match_relations.count)
261
280
  end
262
281
 
263
282
  count_reference_match_relations = begin
264
- count = Hash.new(0)
283
+ count = count_reference_relations.transform_values{|v| 0}
265
284
  reference_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
266
- reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.count}
285
+ reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
267
286
  count.update('All' => reference_match_relations.count)
268
287
  end
269
288
 
@@ -285,10 +304,21 @@ class PubannotationEvaluator
285
304
  end
286
305
 
287
306
  def get_prf(counts)
307
+ precision = counts[:study].keys.inject({}){|m, k| m.merge(k => counts[:matched_study][k].to_f / counts[:study][k]) if counts[:study][k] > 0}
308
+ recall = counts[:reference].keys.inject({}){|m, k| m.merge(k => counts[:matched_reference][k].to_f / counts[:reference][k]) if counts[:reference][k] > 0}
309
+
288
310
  keys = (counts[:study].keys + counts[:reference].keys).uniq
289
- precision = keys.inject(Hash.new(0)){|m, k| m[k] = counts[:study][k] > 0 ? counts[:matched_study][k].to_f / counts[:study][k] : 0; m}
290
- recall = keys.inject(Hash.new(0)){|m, k| m[k] = counts[:reference][k] > 0 ? counts[:matched_reference][k].to_f / counts[:reference][k] : 0; m}
291
- fscore = keys.inject(Hash.new(0)){|m, k| p = precision[k]; r = recall[k]; m[k] = (p * r) > 0 ? 2.to_f * p * r / (p + r) : 0; m}
311
+ fscore = keys.inject({}) do |m, k|
312
+ _p = precision[k]
313
+ _r = recall[k]
314
+ _f = if _p && _r
315
+ (_p + _r) > 0 ? 2.to_f * _p * _r / (_p + _r) : 0
316
+ else
317
+ _p ? _p : _r
318
+ end
319
+ m.merge(k => _f)
320
+ end
321
+
292
322
  {
293
323
  precision: precision,
294
324
  recall: recall,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pubannotation_evaluator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-30 00:00:00.000000000 Z
11
+ date: 2019-04-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A tool to evaluate the accuracy of a set of annotations.
14
14
  email: jdkim@dbcls.rois.ac.jp