simple_bioc 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3a4b12838e8618ee607ff56a7170cda15e0f242c
4
- data.tar.gz: 275bff96a672d49519d81b47fb97f00d1eda0ad4
3
+ metadata.gz: 85e59da540b4e77a5802b72315e9a9a8e7c60a71
4
+ data.tar.gz: 5d0db6bfd1c2fd8fd3ea713de9f60aeff0c7b365
5
5
  SHA512:
6
- metadata.gz: c6a49eada64585f46811c22d6e76f3791ab24b4f8ef961aac2c7ff6bf6e19d764c54f35f519d6fad2eddbeff324bc9819fdecb318579cc55fc022b7ee73b702b
7
- data.tar.gz: a835ff1a99236baf0ebcfca85069c1821b137cb9412feb873a277579cb55d1d052699e0782ccb9fddfdc7ed1c4caedc3b8be44d26d4066262c1d5492cc72e886
6
+ metadata.gz: db1b62532952cfc88237d359dc70d1c5aa0abbc6152455b2245f0229b4e8138985fc716adba6f9648e358de50342e5de984f48f52ea59a59560b3ebc46efd75a
7
+ data.tar.gz: 398312f95e42cd2f36371509a4a85a1c35bd670f60cb30b54556aa0fbb75810764578bb1ff726d5a140966480d6e64510ee28f93a047b146d48543362e4b3395
@@ -49,7 +49,7 @@ module BioCMerger
49
49
  copy_text(s_d, s_s)
50
50
  copy_relations(doc_d, s_d, s_s, id_map)
51
51
  copy_annotations(doc_d, s_d, s_s, id_map)
52
- adjust_annotation_offset(s_d)
52
+ s_d.adjust_annotation_offsets
53
53
  end
54
54
  elsif p_d.sentences.size == 0
55
55
  p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
@@ -77,21 +77,11 @@ module BioCMerger
77
77
  end
78
78
  end
79
79
  copy_annotations(doc_d, p_d, p_s, id_map)
80
- adjust_annotation_offset(p_d)
80
+ p_d.adjust_annotation_offsets
81
81
  end
82
82
  puts warnings
83
83
  end
84
84
 
85
- def adjust_annotation_offset(obj)
86
- return if obj.nil?
87
- obj.annotations.each do |a|
88
- positions = find_all_locations(obj, a.text)
89
- a.locations.each do |l|
90
- l.offset = choose_offset_candidate(l.offset, positions)
91
- end
92
- end
93
- end
94
-
95
85
  def adjust_relation_refids(doc, id_map)
96
86
  adjust_relation_refid(doc, id_map)
97
87
  doc.passages.each do |p|
@@ -236,29 +226,4 @@ module BioCMerger
236
226
  end
237
227
  end
238
228
  end
239
-
240
- def find_all_locations(obj, text)
241
- positions = []
242
- return positions if obj.nil? || obj.text.nil?
243
- pos = obj.text.index(text)
244
- until pos.nil?
245
- positions << (pos + obj.offset)
246
- pos = obj.text.index(text, pos + 1)
247
- end
248
- return positions
249
- end
250
-
251
- def choose_offset_candidate(offset, positions)
252
- min_diff = 99999
253
- ret = offset
254
- offset = offset.to_i
255
- positions.each do |p|
256
- diff = (offset - p).abs
257
- if diff < min_diff
258
- offset = p
259
- min_diff = diff
260
- end
261
- end
262
- return ret
263
- end
264
229
  end
@@ -79,6 +79,7 @@ module BioCReader
79
79
  read_recursive(xml, passage, "sentence")
80
80
  read_recursive(xml, passage, "annotation")
81
81
  read_recursive(xml, passage, "relation")
82
+ passage.adjust_annotation_offsets
82
83
  true
83
84
  end
84
85
 
@@ -88,6 +89,7 @@ module BioCReader
88
89
  read_infon(xml, sentence)
89
90
  read_recursive(xml, sentence, "annotation")
90
91
  read_recursive(xml, sentence, "relation")
92
+ sentence.adjust_annotation_offsets
91
93
  true
92
94
  end
93
95
 
@@ -0,0 +1,39 @@
1
+ module LocationAdjuster
2
+ def adjust_annotation_offsets
3
+ obj = self
4
+ return if obj.nil?
5
+ obj.annotations.each do |a|
6
+ positions = find_all_locations(obj, a.text)
7
+ a.locations.each do |l|
8
+ l.offset = choose_offset_candidate(l.offset, positions)
9
+ end
10
+ end
11
+ end
12
+
13
+ module_function
14
+
15
+ def find_all_locations(obj, text)
16
+ positions = []
17
+ return positions if obj.nil? || obj.text.nil?
18
+ pos = obj.text.index(text)
19
+ until pos.nil?
20
+ positions << (pos + obj.offset)
21
+ pos = obj.text.index(text, pos + 1)
22
+ end
23
+ return positions
24
+ end
25
+
26
+ def choose_offset_candidate(offset, positions)
27
+ min_diff = 99999
28
+ offset = offset.to_i
29
+ ret = offset
30
+ positions.each do |p|
31
+ diff = (offset - p).abs
32
+ if diff < min_diff
33
+ ret = p
34
+ min_diff = diff
35
+ end
36
+ end
37
+ return ret
38
+ end
39
+ end
@@ -1,5 +1,7 @@
1
1
  module SimpleBioC
2
2
  class Passage
3
+ include LocationAdjuster
4
+
3
5
  attr_accessor :offset, :text, :infons, :sentences, :annotations, :relations
4
6
  attr_reader :document
5
7
 
@@ -1,5 +1,7 @@
1
1
  module SimpleBioC
2
2
  class Sentence
3
+ include LocationAdjuster
4
+
3
5
  attr_accessor :offset, :text, :infons, :annotations, :relations
4
6
  attr_reader :passage
5
7
 
@@ -1,3 +1,3 @@
1
1
  module SimpleBioC
2
- VERSION = "0.0.11"
2
+ VERSION = "0.0.12"
3
3
  end
@@ -32,11 +32,19 @@ describe "File Check" do
32
32
 
33
33
  SimpleBioC.merge(col1, col2)
34
34
  output = SimpleBioC.to_xml(col1)
35
- File.write("./xml/merge/output.xml", output)
35
+ File.write("./xml/merge/output_10330397.xml", output)
36
36
  puts "merge2"
37
- col5 = SimpleBioC.from_xml("./xml/merge/output.xml")
37
+ col5 = SimpleBioC.from_xml("./xml/merge/output_10330397.xml")
38
+ end
39
+
40
+ it "should fix location problem" do
41
+ col1 = SimpleBioC.from_xml("./xml/merge/10366597_error.xml")
42
+ output = SimpleBioC.to_xml(col1)
43
+ File.write("./xml/merge/output_10366597.xml", output)
44
+ col5 = SimpleBioC.from_xml("./xml/merge/output_10366597.xml")
38
45
  end
39
46
 
47
+
40
48
  it "should merge documents successfully with different order" do
41
49
  col4 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
42
50
  col3 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")