simple_bioc 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3a4b12838e8618ee607ff56a7170cda15e0f242c
4
- data.tar.gz: 275bff96a672d49519d81b47fb97f00d1eda0ad4
3
+ metadata.gz: 85e59da540b4e77a5802b72315e9a9a8e7c60a71
4
+ data.tar.gz: 5d0db6bfd1c2fd8fd3ea713de9f60aeff0c7b365
5
5
  SHA512:
6
- metadata.gz: c6a49eada64585f46811c22d6e76f3791ab24b4f8ef961aac2c7ff6bf6e19d764c54f35f519d6fad2eddbeff324bc9819fdecb318579cc55fc022b7ee73b702b
7
- data.tar.gz: a835ff1a99236baf0ebcfca85069c1821b137cb9412feb873a277579cb55d1d052699e0782ccb9fddfdc7ed1c4caedc3b8be44d26d4066262c1d5492cc72e886
6
+ metadata.gz: db1b62532952cfc88237d359dc70d1c5aa0abbc6152455b2245f0229b4e8138985fc716adba6f9648e358de50342e5de984f48f52ea59a59560b3ebc46efd75a
7
+ data.tar.gz: 398312f95e42cd2f36371509a4a85a1c35bd670f60cb30b54556aa0fbb75810764578bb1ff726d5a140966480d6e64510ee28f93a047b146d48543362e4b3395
@@ -49,7 +49,7 @@ module BioCMerger
49
49
  copy_text(s_d, s_s)
50
50
  copy_relations(doc_d, s_d, s_s, id_map)
51
51
  copy_annotations(doc_d, s_d, s_s, id_map)
52
- adjust_annotation_offset(s_d)
52
+ s_d.adjust_annotation_offsets
53
53
  end
54
54
  elsif p_d.sentences.size == 0
55
55
  p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
@@ -77,21 +77,11 @@ module BioCMerger
77
77
  end
78
78
  end
79
79
  copy_annotations(doc_d, p_d, p_s, id_map)
80
- adjust_annotation_offset(p_d)
80
+ p_d.adjust_annotation_offsets
81
81
  end
82
82
  puts warnings
83
83
  end
84
84
 
85
- def adjust_annotation_offset(obj)
86
- return if obj.nil?
87
- obj.annotations.each do |a|
88
- positions = find_all_locations(obj, a.text)
89
- a.locations.each do |l|
90
- l.offset = choose_offset_candidate(l.offset, positions)
91
- end
92
- end
93
- end
94
-
95
85
  def adjust_relation_refids(doc, id_map)
96
86
  adjust_relation_refid(doc, id_map)
97
87
  doc.passages.each do |p|
@@ -236,29 +226,4 @@ module BioCMerger
236
226
  end
237
227
  end
238
228
  end
239
-
240
- def find_all_locations(obj, text)
241
- positions = []
242
- return positions if obj.nil? || obj.text.nil?
243
- pos = obj.text.index(text)
244
- until pos.nil?
245
- positions << (pos + obj.offset)
246
- pos = obj.text.index(text, pos + 1)
247
- end
248
- return positions
249
- end
250
-
251
- def choose_offset_candidate(offset, positions)
252
- min_diff = 99999
253
- ret = offset
254
- offset = offset.to_i
255
- positions.each do |p|
256
- diff = (offset - p).abs
257
- if diff < min_diff
258
- offset = p
259
- min_diff = diff
260
- end
261
- end
262
- return ret
263
- end
264
229
  end
@@ -79,6 +79,7 @@ module BioCReader
79
79
  read_recursive(xml, passage, "sentence")
80
80
  read_recursive(xml, passage, "annotation")
81
81
  read_recursive(xml, passage, "relation")
82
+ passage.adjust_annotation_offsets
82
83
  true
83
84
  end
84
85
 
@@ -88,6 +89,7 @@ module BioCReader
88
89
  read_infon(xml, sentence)
89
90
  read_recursive(xml, sentence, "annotation")
90
91
  read_recursive(xml, sentence, "relation")
92
+ sentence.adjust_annotation_offsets
91
93
  true
92
94
  end
93
95
 
@@ -0,0 +1,39 @@
1
+ module LocationAdjuster
2
+ def adjust_annotation_offsets
3
+ obj = self
4
+ return if obj.nil?
5
+ obj.annotations.each do |a|
6
+ positions = find_all_locations(obj, a.text)
7
+ a.locations.each do |l|
8
+ l.offset = choose_offset_candidate(l.offset, positions)
9
+ end
10
+ end
11
+ end
12
+
13
+ module_function
14
+
15
+ def find_all_locations(obj, text)
16
+ positions = []
17
+ return positions if obj.nil? || obj.text.nil?
18
+ pos = obj.text.index(text)
19
+ until pos.nil?
20
+ positions << (pos + obj.offset)
21
+ pos = obj.text.index(text, pos + 1)
22
+ end
23
+ return positions
24
+ end
25
+
26
+ def choose_offset_candidate(offset, positions)
27
+ min_diff = 99999
28
+ offset = offset.to_i
29
+ ret = offset
30
+ positions.each do |p|
31
+ diff = (offset - p).abs
32
+ if diff < min_diff
33
+ ret = p
34
+ min_diff = diff
35
+ end
36
+ end
37
+ return ret
38
+ end
39
+ end
@@ -1,5 +1,7 @@
1
1
  module SimpleBioC
2
2
  class Passage
3
+ include LocationAdjuster
4
+
3
5
  attr_accessor :offset, :text, :infons, :sentences, :annotations, :relations
4
6
  attr_reader :document
5
7
 
@@ -1,5 +1,7 @@
1
1
  module SimpleBioC
2
2
  class Sentence
3
+ include LocationAdjuster
4
+
3
5
  attr_accessor :offset, :text, :infons, :annotations, :relations
4
6
  attr_reader :passage
5
7
 
@@ -1,3 +1,3 @@
1
1
  module SimpleBioC
2
- VERSION = "0.0.11"
2
+ VERSION = "0.0.12"
3
3
  end
@@ -32,11 +32,19 @@ describe "File Check" do
32
32
 
33
33
  SimpleBioC.merge(col1, col2)
34
34
  output = SimpleBioC.to_xml(col1)
35
- File.write("./xml/merge/output.xml", output)
35
+ File.write("./xml/merge/output_10330397.xml", output)
36
36
  puts "merge2"
37
- col5 = SimpleBioC.from_xml("./xml/merge/output.xml")
37
+ col5 = SimpleBioC.from_xml("./xml/merge/output_10330397.xml")
38
+ end
39
+
40
+ it "should fix location problem" do
41
+ col1 = SimpleBioC.from_xml("./xml/merge/10366597_error.xml")
42
+ output = SimpleBioC.to_xml(col1)
43
+ File.write("./xml/merge/output_10366597.xml", output)
44
+ col5 = SimpleBioC.from_xml("./xml/merge/output_10366597.xml")
38
45
  end
39
46
 
47
+
40
48
  it "should merge documents successfully with different order" do
41
49
  col4 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
42
50
  col3 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")