simple_bioc 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/simple_bioc/bioc_merger.rb +2 -37
- data/lib/simple_bioc/bioc_reader.rb +2 -0
- data/lib/simple_bioc/location_adjuster.rb +39 -0
- data/lib/simple_bioc/passage.rb +2 -0
- data/lib/simple_bioc/sentence.rb +2 -0
- data/lib/simple_bioc/version.rb +1 -1
- data/spec/file_check_spec.rb +10 -2
- data/xml/10330397_gene.xml +1 -0
- data/xml/10330397_ppimention.xml +1442 -0
- data/xml/merge/10366597_error.xml +3 -0
- data/xml/merge/output_10330397.xml +6447 -0
- data/xml/merge/output_10366597.xml +1360 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85e59da540b4e77a5802b72315e9a9a8e7c60a71
|
4
|
+
data.tar.gz: 5d0db6bfd1c2fd8fd3ea713de9f60aeff0c7b365
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db1b62532952cfc88237d359dc70d1c5aa0abbc6152455b2245f0229b4e8138985fc716adba6f9648e358de50342e5de984f48f52ea59a59560b3ebc46efd75a
|
7
|
+
data.tar.gz: 398312f95e42cd2f36371509a4a85a1c35bd670f60cb30b54556aa0fbb75810764578bb1ff726d5a140966480d6e64510ee28f93a047b146d48543362e4b3395
|
@@ -49,7 +49,7 @@ module BioCMerger
|
|
49
49
|
copy_text(s_d, s_s)
|
50
50
|
copy_relations(doc_d, s_d, s_s, id_map)
|
51
51
|
copy_annotations(doc_d, s_d, s_s, id_map)
|
52
|
-
|
52
|
+
s_d.adjust_annotation_offsets
|
53
53
|
end
|
54
54
|
elsif p_d.sentences.size == 0
|
55
55
|
p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
|
@@ -77,21 +77,11 @@ module BioCMerger
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
copy_annotations(doc_d, p_d, p_s, id_map)
|
80
|
-
|
80
|
+
p_d.adjust_annotation_offsets
|
81
81
|
end
|
82
82
|
puts warnings
|
83
83
|
end
|
84
84
|
|
85
|
-
def adjust_annotation_offset(obj)
|
86
|
-
return if obj.nil?
|
87
|
-
obj.annotations.each do |a|
|
88
|
-
positions = find_all_locations(obj, a.text)
|
89
|
-
a.locations.each do |l|
|
90
|
-
l.offset = choose_offset_candidate(l.offset, positions)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
85
|
def adjust_relation_refids(doc, id_map)
|
96
86
|
adjust_relation_refid(doc, id_map)
|
97
87
|
doc.passages.each do |p|
|
@@ -236,29 +226,4 @@ module BioCMerger
|
|
236
226
|
end
|
237
227
|
end
|
238
228
|
end
|
239
|
-
|
240
|
-
def find_all_locations(obj, text)
|
241
|
-
positions = []
|
242
|
-
return positions if obj.nil? || obj.text.nil?
|
243
|
-
pos = obj.text.index(text)
|
244
|
-
until pos.nil?
|
245
|
-
positions << (pos + obj.offset)
|
246
|
-
pos = obj.text.index(text, pos + 1)
|
247
|
-
end
|
248
|
-
return positions
|
249
|
-
end
|
250
|
-
|
251
|
-
def choose_offset_candidate(offset, positions)
|
252
|
-
min_diff = 99999
|
253
|
-
ret = offset
|
254
|
-
offset = offset.to_i
|
255
|
-
positions.each do |p|
|
256
|
-
diff = (offset - p).abs
|
257
|
-
if diff < min_diff
|
258
|
-
offset = p
|
259
|
-
min_diff = diff
|
260
|
-
end
|
261
|
-
end
|
262
|
-
return ret
|
263
|
-
end
|
264
229
|
end
|
@@ -79,6 +79,7 @@ module BioCReader
|
|
79
79
|
read_recursive(xml, passage, "sentence")
|
80
80
|
read_recursive(xml, passage, "annotation")
|
81
81
|
read_recursive(xml, passage, "relation")
|
82
|
+
passage.adjust_annotation_offsets
|
82
83
|
true
|
83
84
|
end
|
84
85
|
|
@@ -88,6 +89,7 @@ module BioCReader
|
|
88
89
|
read_infon(xml, sentence)
|
89
90
|
read_recursive(xml, sentence, "annotation")
|
90
91
|
read_recursive(xml, sentence, "relation")
|
92
|
+
sentence.adjust_annotation_offsets
|
91
93
|
true
|
92
94
|
end
|
93
95
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module LocationAdjuster
|
2
|
+
def adjust_annotation_offsets
|
3
|
+
obj = self
|
4
|
+
return if obj.nil?
|
5
|
+
obj.annotations.each do |a|
|
6
|
+
positions = find_all_locations(obj, a.text)
|
7
|
+
a.locations.each do |l|
|
8
|
+
l.offset = choose_offset_candidate(l.offset, positions)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module_function
|
14
|
+
|
15
|
+
def find_all_locations(obj, text)
|
16
|
+
positions = []
|
17
|
+
return positions if obj.nil? || obj.text.nil?
|
18
|
+
pos = obj.text.index(text)
|
19
|
+
until pos.nil?
|
20
|
+
positions << (pos + obj.offset)
|
21
|
+
pos = obj.text.index(text, pos + 1)
|
22
|
+
end
|
23
|
+
return positions
|
24
|
+
end
|
25
|
+
|
26
|
+
def choose_offset_candidate(offset, positions)
|
27
|
+
min_diff = 99999
|
28
|
+
offset = offset.to_i
|
29
|
+
ret = offset
|
30
|
+
positions.each do |p|
|
31
|
+
diff = (offset - p).abs
|
32
|
+
if diff < min_diff
|
33
|
+
ret = p
|
34
|
+
min_diff = diff
|
35
|
+
end
|
36
|
+
end
|
37
|
+
return ret
|
38
|
+
end
|
39
|
+
end
|
data/lib/simple_bioc/passage.rb
CHANGED
data/lib/simple_bioc/sentence.rb
CHANGED
data/lib/simple_bioc/version.rb
CHANGED
data/spec/file_check_spec.rb
CHANGED
@@ -32,11 +32,19 @@ describe "File Check" do
|
|
32
32
|
|
33
33
|
SimpleBioC.merge(col1, col2)
|
34
34
|
output = SimpleBioC.to_xml(col1)
|
35
|
-
File.write("./xml/merge/
|
35
|
+
File.write("./xml/merge/output_10330397.xml", output)
|
36
36
|
puts "merge2"
|
37
|
-
col5 = SimpleBioC.from_xml("./xml/merge/
|
37
|
+
col5 = SimpleBioC.from_xml("./xml/merge/output_10330397.xml")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should fix location problem" do
|
41
|
+
col1 = SimpleBioC.from_xml("./xml/merge/10366597_error.xml")
|
42
|
+
output = SimpleBioC.to_xml(col1)
|
43
|
+
File.write("./xml/merge/output_10366597.xml", output)
|
44
|
+
col5 = SimpleBioC.from_xml("./xml/merge/output_10366597.xml")
|
38
45
|
end
|
39
46
|
|
47
|
+
|
40
48
|
it "should merge documents successfully with different order" do
|
41
49
|
col4 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
|
42
50
|
col3 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")
|