pubannotation_evaluator 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 469a3def4423b8d7fb83f678b13dfd387ff8c2bd0f3a6267253f7ca30f688c86
4
- data.tar.gz: e4b34328fd5658234227701e00440f9a0f7f3492cef0e47eb03ea3203f9a9001
3
+ metadata.gz: 1f492f0c865db6673ec836d33c24b4d22f73f26e0f957ced3e679cd7b8fe9ae5
4
+ data.tar.gz: 01be9be7175842fdb5f217a2870b2734b58c912520c593dd990c2d6783a85fbd
5
5
  SHA512:
6
- metadata.gz: a360dd7c5d6b3b519c898d2047219570cd1fbcb239af72d09fffee02a69be7b1d888a2070e0b2d5defd190abe3394f99fa2591ac38fd0254b16d60e6cc9a20f9
7
- data.tar.gz: bddde4792bda694d88b200893d22d8f8ff993b48f38068ee797b28abf652602c994f3d857f3082d45fd25a1f3652dd49e45600de87c9eb0f3337dedeb310d84c
6
+ metadata.gz: ffaf46d1e897243a6ff7e87e14797a36e20d6847519797be33ba1a4074cce46fb167de0e942e3288ce5e5f6cbf9f859edf2db76b9fb0fc62693b50397b0bbc9b
7
+ data.tar.gz: 0cef53db1ee03d022f0c182e6e1babcec850ec86d18d636ebd8f42d1491732d2aead1279ba81cd6d7b265193a7019f832990b5229ba35008392d73868ca32869
@@ -3,15 +3,40 @@ require 'pubannotation_evaluator'
3
3
  require 'json'
4
4
 
5
5
  rdir = nil
6
+ verbose = false
7
+
8
+ soft_match_characters = PubannotationEvaluator::SOFT_MATCH_CHARACTERS
9
+ soft_match_words = PubannotationEvaluator::SOFT_MATCH_WORDS
10
+ denotation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
11
+ relation_type_match = PubannotationEvaluator::EXACT_TYPE_MATCH
6
12
 
7
13
  ## command line option processing
8
14
  require 'optparse'
9
15
  optparse = OptionParser.new do |opts|
10
16
  opts.banner = "Usage: pubannotation-eval.rb [options] annotation_file(s)"
11
17
 
12
- opts.on('-r', '--rdir=directory', 'specifies the path to the directory of reference annotation_file(s).') do |dir|
18
+ opts.on('-r', '--rdir=DIR', 'specifies the path to the directory of reference annotation_file(s).') do |dir|
13
19
  rdir = dir
20
+ end
21
+
22
+ opts.on('-c', '--soft-match-characters=INT', "specifies the number of characters to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_CHARACTERS}).") do |i|
23
+ soft_match_characters = i.to_i
24
+ end
14
25
 
26
+ opts.on('-w', '--soft-match-words=INT', "specifies the number of words to allow for boundary mismatch (default=#{PubannotationEvaluator::SOFT_MATCH_WORDS}).") do |i|
27
+ soft_match_words = i.to_i
28
+ end
29
+
30
+ opts.on('-D', '--denotation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
31
+ denotation_type_match = b
32
+ end
33
+
34
+ opts.on('-R', '--relation-type-match=TEXT', "specifies a ruby block to determine type match of two denotations (defalut='#{PubannotationEvaluator::EXACT_TYPE_MATCH}').") do |b|
35
+ relation_type_match = b
36
+ end
37
+
38
+ opts.on('-v', '--verbose', "tells it to report false positives and false negatives.") do
39
+ verbose = true
15
40
  end
16
41
 
17
42
  opts.on('-h', '--help', 'displays this screen.') do
@@ -27,7 +52,7 @@ if ARGV.length == 0 || rdir.nil?
27
52
  exit
28
53
  end
29
54
 
30
- evaluator = PubannotationEvaluator.new
55
+ evaluator = PubannotationEvaluator.new(soft_match_characters, soft_match_words, denotation_type_match, relation_type_match)
31
56
 
32
57
  comparison = ARGV.inject([]) do |col, filepath|
33
58
  if File.extname(filepath) == '.json'
@@ -53,6 +78,12 @@ end
53
78
 
54
79
  evaluation = evaluator.evaluate(comparison)
55
80
 
56
- false_positives = comparison.select{|m| m[:study] && m[:reference].nil?}
57
- false_negatives = comparison.select{|m| m[:study].nil? && m[:reference]}
58
- puts JSON.generate(evaluation.merge(false_positives:false_positives, false_negatives:false_negatives))
81
+ if verbose
82
+ false_positives = comparison.select{|m| m[:study] && m[:reference].nil?}
83
+ evaluation[:false_positives] = false_positives unless false_positives.empty?
84
+
85
+ false_negatives = comparison.select{|m| m[:study].nil? && m[:reference]}
86
+ evaluation[:false_negatives] = false_negatives unless false_negatives.empty?
87
+ end
88
+
89
+ puts JSON.generate(evaluation)
@@ -1,10 +1,21 @@
1
1
  class PubannotationEvaluator
2
- BOUNDARY_SOFTNESS_CHARACTER = 20
3
- BOUNDARY_SOFTNESS_WORD = 2
4
-
5
- def initialize(boundary_softness_character = BOUNDARY_SOFTNESS_CHARACTER, boundary_softness_word = BOUNDARY_SOFTNESS_WORD)
6
- @boundary_softness_character = boundary_softness_character
7
- @boundary_softness_word = boundary_softness_word
2
+ SOFT_MATCH_CHARACTERS = 20
3
+ SOFT_MATCH_WORDS = 2
4
+ EXACT_TYPE_MATCH = 'study_type == reference_type ? 1 : 0'
5
+
6
+ def initialize(soft_match_chatacters = SOFT_MATCH_CHARACTERS, soft_match_words = SOFT_MATCH_WORDS, denotation_type_match = EXACT_TYPE_MATCH, relation_type_match = EXACT_TYPE_MATCH)
7
+ @soft_match_chatacters = soft_match_chatacters
8
+ @soft_match_words = soft_match_words
9
+ @denotation_type_match = eval <<-HEREDOC
10
+ Proc.new do |study_type, reference_type|
11
+ #{denotation_type_match}
12
+ end
13
+ HEREDOC
14
+ @relation_type_match = eval <<-HEREDOC
15
+ Proc.new do |study_type, reference_type|
16
+ #{relation_type_match}
17
+ end
18
+ HEREDOC
8
19
  end
9
20
 
10
21
  # To compare two sets of annotations
@@ -45,6 +56,14 @@ class PubannotationEvaluator
45
56
  {counts:counts, measures:measures}
46
57
  end
47
58
 
59
+ def get_false_positives(comparison, project_name)
60
+ comparison.select{|m| m[:study] && m[:reference].nil?}
61
+ end
62
+
63
+ def get_false_negatives(comparison, project_name)
64
+ comparison.select{|m| m[:study].nil? && m[:reference]}
65
+ end
66
+
48
67
  private
49
68
 
50
69
  def compare_denotations(study_denotations, reference_denotations, text)
@@ -61,18 +80,18 @@ class PubannotationEvaluator
61
80
  study_denotations = study_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
62
81
  reference_denotations = reference_denotations.sort_by{|d| [d[:span][:begin], -d[:span][:end]]}
63
82
 
64
- matches = []
83
+ mmatches = []
65
84
  study_denotations.each do |s|
66
85
  r_begin = reference_denotations.bsearch_index{|r| r[:span][:end] > s[:span][:begin]}
67
86
  r_end = reference_denotations.bsearch_index{|r| r[:span][:begin] > s[:span][:end]}
68
87
  r_end = r_end.nil? ? -1 : r_end - 1
69
88
  reference_denotations[r_begin .. r_end].each do |r|
70
89
  relatedness = get_relatedness_of_denotations(s, r, text)
71
- matches << {study:s, reference:r, weight:relatedness} if relatedness > 0
90
+ mmatches << {study:s, reference:r, weight:relatedness} if relatedness > 0
72
91
  end
73
92
  end
74
93
 
75
- matches
94
+ mmatches
76
95
  end
77
96
 
78
97
  # To determine how much the two annotations match to each other based on the denotation match criteria
@@ -81,7 +100,7 @@ class PubannotationEvaluator
81
100
  return 0 if s[:span][:end] <= r[:span][:begin] || s[:span][:begin] >= r[:span][:end]
82
101
 
83
102
  # character-level tolerance
84
- return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @boundary_softness_character || (s[:span][:end] - r[:span][:end]).abs > @boundary_softness_character
103
+ return 0 if (s[:span][:begin] - r[:span][:begin]).abs > @soft_match_chatacters || (s[:span][:end] - r[:span][:end]).abs > @soft_match_chatacters
85
104
 
86
105
  # word-level tolerance
87
106
  front_mismatch = if s[:span][:begin] < r[:span][:begin]
@@ -89,19 +108,19 @@ class PubannotationEvaluator
89
108
  else
90
109
  text[r[:span][:begin] ... s[:span][:begin]]
91
110
  end
92
- return 0 if front_mismatch.count(' ') > @boundary_softness_word
111
+ return 0 if front_mismatch.count(' ') > @soft_match_words
93
112
 
94
113
  rear_mismatch = if s[:span][:end] < r[:span][:end]
95
114
  text[s[:span][:end] ... r[:span][:end]]
96
115
  else
97
116
  text[r[:span][:end] ... s[:span][:end]]
98
117
  end
99
- return 0 if rear_mismatch.count(' ') > @boundary_softness_word
118
+ return 0 if rear_mismatch.count(' ') > @soft_match_words
100
119
 
101
- return s[:obj] == r[:obj] ? 1 : 0.5
120
+ return @denotation_type_match.call(s[:obj], r[:obj])
102
121
  end
103
122
 
104
- def find_denotation_matches(matches)
123
+ def find_denotation_matches(mmatches)
105
124
  comp = Proc.new do |a, b|
106
125
  c = a[:weight] <=> b[:weight]
107
126
  if c.zero?
@@ -115,7 +134,7 @@ class PubannotationEvaluator
115
134
  c
116
135
  end
117
136
  end
118
- find_exclusive_matches(matches, comp)
137
+ find_exclusive_matches(mmatches, comp)
119
138
  end
120
139
 
121
140
  def compare_relations(study_relations, reference_relations, mmatch_denotations)
@@ -137,7 +156,7 @@ class PubannotationEvaluator
137
156
  end
138
157
 
139
158
  def get_relatedness_of_relations(s, r, mmatch_denotations)
140
- # at least, the subject and object of the two relateions should match to each other.
159
+ # at least, the subject and object of the two relations should match to each other.
141
160
  match_subj = mmatch_denotations.find{|m| m[:study] && m[:reference] && m[:study][:id] == s[:subj] && m[:reference][:id] == r[:subj]}
142
161
  return 0 if match_subj.nil?
143
162
 
@@ -145,7 +164,7 @@ class PubannotationEvaluator
145
164
  return 0 if match_obj.nil?
146
165
 
147
166
  # predicate match
148
- match_pred_weight = s[:pred] == r[:pred] ? 1 : 0
167
+ match_pred_weight = @relation_type_match.call(s[:pred], r[:pred])
149
168
 
150
169
  return (match_subj[:weight] + match_obj[:weight] + match_pred_weight).to_f / 3
151
170
  end
@@ -158,7 +177,7 @@ class PubannotationEvaluator
158
177
  find_exclusive_matches(matches, comp)
159
178
  end
160
179
 
161
-
180
+ # TODO: to implement it
162
181
  def compare_modifications(study_modifications, reference_modifications, comparison_relations, compare_relations)
163
182
  []
164
183
  end
@@ -200,31 +219,31 @@ class PubannotationEvaluator
200
219
  def count(comparison)
201
220
  # counts of denotations
202
221
  count_study_denotations = begin
203
- count = Hash.new(0)
222
+ count = {}
204
223
  study_denotations = comparison.select{|m| m[:study] && m[:type]==:denotation}
205
224
  study_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.count}
206
225
  count.update('All' => study_denotations.count)
207
226
  end
208
227
 
209
228
  count_reference_denotations = begin
210
- count = Hash.new(0)
229
+ count = {}
211
230
  reference_denotations = comparison.select{|m| m[:reference] && m[:type]==:denotation}
212
231
  reference_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.count}
213
232
  count.update('All' => reference_denotations.count)
214
233
  end
215
234
 
216
235
  count_study_match_denotations = begin
217
- count = Hash.new(0)
236
+ count = count_study_denotations.transform_values{|v| 0}
218
237
  study_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
219
- study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.count}
220
- count.update('All' => study_match_denotations.count)
238
+ study_match_denotations.group_by{|m| m[:study][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
239
+ count.update('All' => study_match_denotations.inject(0){|s, c| s+=c[:weight]})
221
240
  end
222
241
 
223
242
  count_reference_match_denotations = begin
224
- count = Hash.new(0)
243
+ count = count_reference_denotations.transform_values{|v| 0}
225
244
  reference_match_denotations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:denotation}
226
- reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.count}
227
- count.update('All' => reference_match_denotations.count)
245
+ reference_match_denotations.group_by{|m| m[:reference][:obj]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
246
+ count.update('All' => reference_match_denotations.inject(0){|s, c| s+=c[:weight]})
228
247
  end
229
248
 
230
249
  counts = {
@@ -240,30 +259,30 @@ class PubannotationEvaluator
240
259
 
241
260
  # counts of relations
242
261
  count_study_relations = begin
243
- count = Hash.new(0)
262
+ count = {}
244
263
  study_relations = comparison.select{|m| m[:study] && m[:type]==:relation}
245
264
  study_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.count}
246
265
  count.update('All' => study_relations.count)
247
266
  end
248
267
 
249
268
  count_reference_relations = begin
250
- count = Hash.new(0)
269
+ count = {}
251
270
  reference_relations = comparison.select{|m| m[:reference] && m[:type]==:relation}
252
271
  reference_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.count}
253
272
  count.update('All' => reference_relations.count)
254
273
  end
255
274
 
256
275
  count_study_match_relations = begin
257
- count = Hash.new(0)
276
+ count = count_study_relations.transform_values{|v| 0}
258
277
  study_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
259
- study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.count}
278
+ study_match_relations.group_by{|m| m[:study][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
260
279
  count.update('All' => study_match_relations.count)
261
280
  end
262
281
 
263
282
  count_reference_match_relations = begin
264
- count = Hash.new(0)
283
+ count = count_reference_relations.transform_values{|v| 0}
265
284
  reference_match_relations = comparison.select{|m| m[:study] && m[:reference] && m[:type]==:relation}
266
- reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.count}
285
+ reference_match_relations.group_by{|m| m[:reference][:pred]}.each{|k, m| count[k] = m.inject(0){|s, c| s+=c[:weight]}}
267
286
  count.update('All' => reference_match_relations.count)
268
287
  end
269
288
 
@@ -285,10 +304,21 @@ class PubannotationEvaluator
285
304
  end
286
305
 
287
306
  def get_prf(counts)
307
+ precision = counts[:study].keys.inject({}){|m, k| m.merge(k => counts[:matched_study][k].to_f / counts[:study][k]) if counts[:study][k] > 0}
308
+ recall = counts[:reference].keys.inject({}){|m, k| m.merge(k => counts[:matched_reference][k].to_f / counts[:reference][k]) if counts[:reference][k] > 0}
309
+
288
310
  keys = (counts[:study].keys + counts[:reference].keys).uniq
289
- precision = keys.inject(Hash.new(0)){|m, k| m[k] = counts[:study][k] > 0 ? counts[:matched_study][k].to_f / counts[:study][k] : 0; m}
290
- recall = keys.inject(Hash.new(0)){|m, k| m[k] = counts[:reference][k] > 0 ? counts[:matched_reference][k].to_f / counts[:reference][k] : 0; m}
291
- fscore = keys.inject(Hash.new(0)){|m, k| p = precision[k]; r = recall[k]; m[k] = (p * r) > 0 ? 2.to_f * p * r / (p + r) : 0; m}
311
+ fscore = keys.inject({}) do |m, k|
312
+ _p = precision[k]
313
+ _r = recall[k]
314
+ _f = if _p && _r
315
+ (_p + _r) > 0 ? 2.to_f * _p * _r / (_p + _r) : 0
316
+ else
317
+ _p ? _p : _r
318
+ end
319
+ m.merge(k => _f)
320
+ end
321
+
292
322
  {
293
323
  precision: precision,
294
324
  recall: recall,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pubannotation_evaluator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-30 00:00:00.000000000 Z
11
+ date: 2019-04-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A tool to evaluate the accuracy of a set of annotations.
14
14
  email: jdkim@dbcls.rois.ac.jp