simple_bioc 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/simple_bioc/bioc_merger.rb +2 -37
- data/lib/simple_bioc/bioc_reader.rb +2 -0
- data/lib/simple_bioc/location_adjuster.rb +39 -0
- data/lib/simple_bioc/passage.rb +2 -0
- data/lib/simple_bioc/sentence.rb +2 -0
- data/lib/simple_bioc/version.rb +1 -1
- data/spec/file_check_spec.rb +10 -2
- data/xml/10330397_gene.xml +1 -0
- data/xml/10330397_ppimention.xml +1442 -0
- data/xml/merge/10366597_error.xml +3 -0
- data/xml/merge/output_10330397.xml +6447 -0
- data/xml/merge/output_10366597.xml +1360 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85e59da540b4e77a5802b72315e9a9a8e7c60a71
|
4
|
+
data.tar.gz: 5d0db6bfd1c2fd8fd3ea713de9f60aeff0c7b365
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db1b62532952cfc88237d359dc70d1c5aa0abbc6152455b2245f0229b4e8138985fc716adba6f9648e358de50342e5de984f48f52ea59a59560b3ebc46efd75a
|
7
|
+
data.tar.gz: 398312f95e42cd2f36371509a4a85a1c35bd670f60cb30b54556aa0fbb75810764578bb1ff726d5a140966480d6e64510ee28f93a047b146d48543362e4b3395
|
@@ -49,7 +49,7 @@ module BioCMerger
|
|
49
49
|
copy_text(s_d, s_s)
|
50
50
|
copy_relations(doc_d, s_d, s_s, id_map)
|
51
51
|
copy_annotations(doc_d, s_d, s_s, id_map)
|
52
|
-
|
52
|
+
s_d.adjust_annotation_offsets
|
53
53
|
end
|
54
54
|
elsif p_d.sentences.size == 0
|
55
55
|
p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
|
@@ -77,21 +77,11 @@ module BioCMerger
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
copy_annotations(doc_d, p_d, p_s, id_map)
|
80
|
-
|
80
|
+
p_d.adjust_annotation_offsets
|
81
81
|
end
|
82
82
|
puts warnings
|
83
83
|
end
|
84
84
|
|
85
|
-
def adjust_annotation_offset(obj)
|
86
|
-
return if obj.nil?
|
87
|
-
obj.annotations.each do |a|
|
88
|
-
positions = find_all_locations(obj, a.text)
|
89
|
-
a.locations.each do |l|
|
90
|
-
l.offset = choose_offset_candidate(l.offset, positions)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
85
|
def adjust_relation_refids(doc, id_map)
|
96
86
|
adjust_relation_refid(doc, id_map)
|
97
87
|
doc.passages.each do |p|
|
@@ -236,29 +226,4 @@ module BioCMerger
|
|
236
226
|
end
|
237
227
|
end
|
238
228
|
end
|
239
|
-
|
240
|
-
def find_all_locations(obj, text)
|
241
|
-
positions = []
|
242
|
-
return positions if obj.nil? || obj.text.nil?
|
243
|
-
pos = obj.text.index(text)
|
244
|
-
until pos.nil?
|
245
|
-
positions << (pos + obj.offset)
|
246
|
-
pos = obj.text.index(text, pos + 1)
|
247
|
-
end
|
248
|
-
return positions
|
249
|
-
end
|
250
|
-
|
251
|
-
def choose_offset_candidate(offset, positions)
|
252
|
-
min_diff = 99999
|
253
|
-
ret = offset
|
254
|
-
offset = offset.to_i
|
255
|
-
positions.each do |p|
|
256
|
-
diff = (offset - p).abs
|
257
|
-
if diff < min_diff
|
258
|
-
offset = p
|
259
|
-
min_diff = diff
|
260
|
-
end
|
261
|
-
end
|
262
|
-
return ret
|
263
|
-
end
|
264
229
|
end
|
@@ -79,6 +79,7 @@ module BioCReader
|
|
79
79
|
read_recursive(xml, passage, "sentence")
|
80
80
|
read_recursive(xml, passage, "annotation")
|
81
81
|
read_recursive(xml, passage, "relation")
|
82
|
+
passage.adjust_annotation_offsets
|
82
83
|
true
|
83
84
|
end
|
84
85
|
|
@@ -88,6 +89,7 @@ module BioCReader
|
|
88
89
|
read_infon(xml, sentence)
|
89
90
|
read_recursive(xml, sentence, "annotation")
|
90
91
|
read_recursive(xml, sentence, "relation")
|
92
|
+
sentence.adjust_annotation_offsets
|
91
93
|
true
|
92
94
|
end
|
93
95
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module LocationAdjuster
|
2
|
+
def adjust_annotation_offsets
|
3
|
+
obj = self
|
4
|
+
return if obj.nil?
|
5
|
+
obj.annotations.each do |a|
|
6
|
+
positions = find_all_locations(obj, a.text)
|
7
|
+
a.locations.each do |l|
|
8
|
+
l.offset = choose_offset_candidate(l.offset, positions)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module_function
|
14
|
+
|
15
|
+
def find_all_locations(obj, text)
|
16
|
+
positions = []
|
17
|
+
return positions if obj.nil? || obj.text.nil?
|
18
|
+
pos = obj.text.index(text)
|
19
|
+
until pos.nil?
|
20
|
+
positions << (pos + obj.offset)
|
21
|
+
pos = obj.text.index(text, pos + 1)
|
22
|
+
end
|
23
|
+
return positions
|
24
|
+
end
|
25
|
+
|
26
|
+
def choose_offset_candidate(offset, positions)
|
27
|
+
min_diff = 99999
|
28
|
+
offset = offset.to_i
|
29
|
+
ret = offset
|
30
|
+
positions.each do |p|
|
31
|
+
diff = (offset - p).abs
|
32
|
+
if diff < min_diff
|
33
|
+
ret = p
|
34
|
+
min_diff = diff
|
35
|
+
end
|
36
|
+
end
|
37
|
+
return ret
|
38
|
+
end
|
39
|
+
end
|
data/lib/simple_bioc/passage.rb
CHANGED
data/lib/simple_bioc/sentence.rb
CHANGED
data/lib/simple_bioc/version.rb
CHANGED
data/spec/file_check_spec.rb
CHANGED
@@ -32,11 +32,19 @@ describe "File Check" do
|
|
32
32
|
|
33
33
|
SimpleBioC.merge(col1, col2)
|
34
34
|
output = SimpleBioC.to_xml(col1)
|
35
|
-
File.write("./xml/merge/
|
35
|
+
File.write("./xml/merge/output_10330397.xml", output)
|
36
36
|
puts "merge2"
|
37
|
-
col5 = SimpleBioC.from_xml("./xml/merge/
|
37
|
+
col5 = SimpleBioC.from_xml("./xml/merge/output_10330397.xml")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should fix location problem" do
|
41
|
+
col1 = SimpleBioC.from_xml("./xml/merge/10366597_error.xml")
|
42
|
+
output = SimpleBioC.to_xml(col1)
|
43
|
+
File.write("./xml/merge/output_10366597.xml", output)
|
44
|
+
col5 = SimpleBioC.from_xml("./xml/merge/output_10366597.xml")
|
38
45
|
end
|
39
46
|
|
47
|
+
|
40
48
|
it "should merge documents successfully with different order" do
|
41
49
|
col4 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
|
42
50
|
col3 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")
|