simple_bioc 0.0.20 → 0.0.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9861d717fec1b45ab297b2ed93275e77be0c34ee
4
- data.tar.gz: 974019424b72ab4bf3bd87d9444c6f2b04b009b4
3
+ metadata.gz: 344d68b7d1e117a05da96939c1ace1ff10cb630f
4
+ data.tar.gz: 549cfc037ec425cfa53815238aab5628044346c6
5
5
  SHA512:
6
- metadata.gz: 32cf4b3b3f4110098dc780abc434a3f9625e68aa5f98b198537e4672febd6700e215b8cbec7e7c67c7f3c86b4752ac191b88aedc3a25ca50ae2b38102bcc374e
7
- data.tar.gz: 8a982d871c4660d6ceb06f20ce54f96987048b9b7bbc4dc24bb43e9604817b888672f060f9cf2c91308a10646c99350c63008b1c4ae9ec046d6e3eb6707e1714
6
+ metadata.gz: e19302d994c531a1fe9ac2407a48c4517fbef8a6f0671bbc82a2e06e381613d0d48205b705ca372ddd9d8f63282b1845ac5775aec7bf603ed9ddc87fd3a599e9
7
+ data.tar.gz: fa6a72ce6773ee3ac3726be469e3bfa0b3fc808827f4da62926fe7fca7da67eb962fbf3ee4863a456ffc063e32dfcb77c9489efa0d4d5c77e47110bee34224f0
@@ -84,7 +84,6 @@ module BioCReader
84
84
  read_recursive(xml, passage, "sentence")
85
85
  read_recursive(xml, passage, "annotation")
86
86
  read_recursive(xml, passage, "relation")
87
- passage.adjust_annotation_offsets
88
87
  true
89
88
  end
90
89
 
@@ -94,7 +93,6 @@ module BioCReader
94
93
  read_infon(xml, sentence)
95
94
  read_recursive(xml, sentence, "annotation")
96
95
  read_recursive(xml, sentence, "relation")
97
- sentence.adjust_annotation_offsets
98
96
  true
99
97
  end
100
98
 
@@ -1,8 +1,5 @@
1
- require 'simple_bioc/location_adjuster'
2
1
  module SimpleBioC
3
2
  class Passage
4
- include LocationAdjuster
5
-
6
3
  attr_accessor :offset, :text, :infons, :sentences, :annotations, :relations
7
4
  attr_reader :document
8
5
 
@@ -1,8 +1,5 @@
1
- require 'simple_bioc/location_adjuster'
2
1
  module SimpleBioC
3
2
  class Sentence
4
- include LocationAdjuster
5
-
6
3
  attr_accessor :offset, :text, :infons, :annotations, :relations
7
4
  attr_reader :passage
8
5
 
@@ -1,3 +1,3 @@
1
1
  module SimpleBioC
2
- VERSION = "0.0.20"
2
+ VERSION = "0.0.21"
3
3
  end
data/lib/simple_bioc.rb CHANGED
@@ -42,10 +42,6 @@ module SimpleBioC
42
42
  BioCReader.read_from_file_or_string(file, options)
43
43
  end
44
44
 
45
- def merge(dest_bioc, src_bioc)
46
- return BioCMerger.merge(dest_bioc, src_bioc)
47
- end
48
-
49
45
  # parse a BioC XML string and convert it into a collection instance
50
46
  #
51
47
  # ==== Arguments
@@ -11,62 +11,10 @@ describe "File Check" do
11
11
  end
12
12
  end
13
13
 
14
- it "should merge documents successfully" do
15
- col1 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
16
- col2 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")
17
- col3 = SimpleBioC.from_xml("./xml/merge/9864355_2.xml")
18
- col4 = SimpleBioC.from_xml("./xml/merge/9864355_3.xml")
19
-
20
- SimpleBioC.merge(col1, col2)
21
- SimpleBioC.merge(col1, col3)
22
- SimpleBioC.merge(col1, col4)
23
- output = SimpleBioC.to_xml(col1)
24
- File.write("./xml/merge/output.xml", output)
25
- puts "merge1"
26
- col5 = SimpleBioC.from_xml("./xml/merge/output.xml")
27
- end
28
-
29
- it "should merge same documents successfully" do
30
- col1 = SimpleBioC.from_xml("./xml/10330397_gene.xml")
31
- col2 = SimpleBioC.from_xml("./xml/10330397_ppimention.xml")
32
-
33
- SimpleBioC.merge(col1, col2)
34
- output = SimpleBioC.to_xml(col1)
35
- File.write("./xml/merge/output_10330397.xml", output)
36
- puts "merge2"
37
- col5 = SimpleBioC.from_xml("./xml/merge/output_10330397.xml")
38
- end
39
-
40
14
  it "should fix location problem" do
41
15
  col1 = SimpleBioC.from_xml("./xml/merge/10366597_error.xml")
42
16
  output = SimpleBioC.to_xml(col1)
43
17
  File.write("./xml/merge/output_10366597.xml", output)
44
18
  col5 = SimpleBioC.from_xml("./xml/merge/output_10366597.xml")
45
19
  end
46
-
47
-
48
- it "should merge documents successfully with different order" do
49
- col4 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
50
- col3 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")
51
- col1 = SimpleBioC.from_xml("./xml/merge/9864355_2.xml")
52
- col2 = SimpleBioC.from_xml("./xml/merge/9864355_3.xml")
53
-
54
- SimpleBioC.merge(col1, col2)
55
- SimpleBioC.merge(col1, col3)
56
- SimpleBioC.merge(col1, col4)
57
- output = SimpleBioC.to_xml(col1)
58
- File.write("./xml/merge/output.xml", output)
59
- puts "merge12"
60
- col5 = SimpleBioC.from_xml("./xml/merge/output.xml")
61
- end
62
-
63
- it "should load XML files successfully" do
64
- Dir["./xml/*.xml"].each do |file_path|
65
- puts "self-merge #{file_path}"
66
- collection1 = SimpleBioC.from_xml(file_path)
67
- collection2 = SimpleBioC.from_xml(file_path)
68
- SimpleBioC.merge(collection1, collection2)
69
- output = SimpleBioC.to_xml(collection1)
70
- end
71
- end
72
20
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_bioc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.20
4
+ version: 0.0.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dongseop Kwon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-04 00:00:00.000000000 Z
11
+ date: 2016-02-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -185,13 +185,11 @@ files:
185
185
  - html/table_of_contents.html
186
186
  - lib/simple_bioc.rb
187
187
  - lib/simple_bioc/annotation.rb
188
- - lib/simple_bioc/bioc_merger.rb
189
188
  - lib/simple_bioc/bioc_reader.rb
190
189
  - lib/simple_bioc/bioc_writer.rb
191
190
  - lib/simple_bioc/collection.rb
192
191
  - lib/simple_bioc/document.rb
193
192
  - lib/simple_bioc/location.rb
194
- - lib/simple_bioc/location_adjuster.rb
195
193
  - lib/simple_bioc/node.rb
196
194
  - lib/simple_bioc/node_base.rb
197
195
  - lib/simple_bioc/passage.rb
@@ -1,229 +0,0 @@
1
- require 'nokogiri'
2
-
3
- Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
4
-
5
- module BioCMerger
6
- module_function
7
-
8
- def merge(dest_collection, src_collection)
9
- errors = []
10
- warnings = []
11
- id_map = {}
12
-
13
- if dest_collection.documents.size != 1 || src_collection.documents.size != 1
14
- warnings << 'Only the first documents will be merged'
15
- end
16
-
17
- doc_d = dest_collection.documents[0]
18
- doc_s = src_collection.documents[0]
19
-
20
- copy_infons(dest_collection, src_collection)
21
- dest_collection.source = src_collection.source if dest_collection.source.nil? || dest_collection.source.empty?
22
- dest_collection.date = src_collection.date if dest_collection.date.nil? || dest_collection.date.empty?
23
- dest_collection.key = src_collection.key if dest_collection.key.nil? || dest_collection.key.empty?
24
-
25
- copy_infons(doc_d, doc_s)
26
- copy_relations(doc_d, doc_d, doc_s, id_map)
27
-
28
- if doc_d.passages.size != doc_s.passages.size
29
- warnings << 'Passages will not be merged because the numbers of passages in documents are different'
30
- end
31
-
32
- doc_d.passages.each_with_index do |p_d, index|
33
- p_s = doc_s.passages[index]
34
- if p_d.nil? || p_s.nil?
35
- warnings << 'The number of sentences in pages should be the same'
36
- elsif blank?(p_d.text) && blank?(p_s.text) && p_d.sentences.size != p_s.sentences.size
37
- warnings << 'The number of sentences in pages should be the same'
38
- end
39
- end
40
-
41
- doc_d.passages.each_with_index do |p_d, index|
42
- p_s = doc_s.passages[index]
43
- next if p_d.nil? || p_s.nil?
44
- copy_relations(doc_d, p_d, p_s, id_map)
45
- if p_d.sentences.size == p_s.sentences.size
46
- p_d.sentences.each_with_index do |s_d, index|
47
- s_s = p_s.sentences[index]
48
- copy_infons(s_d, s_s)
49
- copy_text(s_d, s_s)
50
- copy_relations(doc_d, s_d, s_s, id_map)
51
- copy_annotations(doc_d, s_d, s_s, id_map)
52
- s_d.adjust_annotation_offsets
53
- end
54
- elsif p_d.sentences.size == 0
55
- p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
56
- p_s.sentences.each do |s|
57
- copy_relations(doc_d, p_d, s, id_map)
58
- copy_annotations(doc_d, p_d, s, id_map)
59
- end
60
- elsif p_s.sentences.size == 0
61
- if p_d.sentences.size > 0
62
- # dest has sentences, but src has only passages.
63
- p_d.text = p_d.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
64
- p_d.sentences.each do |s|
65
- s.annotations.each do |a|
66
- a.clear_sentence
67
- p_d.annotations << a
68
- end
69
- s.relations.each do |r|
70
- r.clear_sentence
71
- p_d.relations << r
72
- end
73
- end
74
- p_d.sentences.clear
75
- else
76
- copy_text(p_d, p_s)
77
- end
78
- end
79
- copy_annotations(doc_d, p_d, p_s, id_map)
80
- p_d.adjust_annotation_offsets
81
- end
82
- puts warnings
83
- end
84
-
85
- def adjust_relation_refids(doc, id_map)
86
- adjust_relation_refid(doc, id_map)
87
- doc.passages.each do |p|
88
- adjust_relation_refid(p, id_map)
89
- p.sentences.each do |s|
90
- adjust_relation_refid(s, id_map)
91
- end
92
- end
93
- end
94
-
95
- def adjust_relation_refid(obj, id_map)
96
- return if obj.nil?
97
- obj.relations.each do |r|
98
- next if r.original.nil?
99
- r.nodes.each do |n|
100
- new_id = id_map[n.refid]
101
- n.refid = new_id unless new_id.nil?
102
- n.adjust_ref
103
- end
104
- end
105
- end
106
-
107
- def copy_relations(doc, dest, src, id_map)
108
- return if src.nil?
109
- src.relations.each do |r|
110
- copy_relation(doc, dest, r, id_map)
111
- end
112
- end
113
-
114
- def copy_annotations(doc, dest, src, id_map)
115
- return if src.nil?
116
- src.annotations.each do |a|
117
- copy_annotation(doc, dest, a, id_map)
118
- end
119
- end
120
- def copy_relation(doc, dest, relation, id_map)
121
- new_r = nil
122
- need_add = true
123
- dest.relations.each do |r|
124
- if r.id == relation.id
125
- new_r = r
126
- need_add = false
127
- break
128
- end
129
- end
130
- if new_r.nil?
131
- new_r = SimpleBioC::Relation.new(dest)
132
- new_r.id = choose_id(doc, relation.id, id_map)
133
- new_r.original = relation
134
- end
135
-
136
- relation.nodes.each do |n|
137
- found = false
138
- new_r.nodes.each do |old_n|
139
- if n.refid == old_n.refid && n.role == old_n.role
140
- found = true
141
- break
142
- end
143
- end
144
- unless found
145
- node = SimpleBioC::Node.new(new_r)
146
- node.refid = n.refid
147
- node.role = n.role
148
- new_r.nodes << node
149
- end
150
- end
151
- copy_infons(new_r, relation)
152
- if need_add
153
- dest.relations << new_r
154
- end
155
- end
156
-
157
- def copy_annotation(doc, dest, annotation, id_map)
158
- new_a = nil
159
- need_add = true
160
- dest.annotations.each do |a|
161
- if a.id == annotation.id && a.text == annotation.text
162
- new_a = a
163
- need_add = false
164
- break
165
- end
166
- end
167
- if new_a.nil?
168
- new_a = SimpleBioC::Annotation.new(dest)
169
- new_a.id = choose_id(doc, annotation.id, id_map)
170
- new_a.text = annotation.text
171
- new_a.locations = []
172
- end
173
-
174
- annotation.locations.each do |l|
175
- found = false
176
- new_a.locations.each do |old_l|
177
- if l.offset == old_l.offset && l.length == old_l.length
178
- found = true
179
- break
180
- end
181
- end
182
- unless found
183
- new_l = SimpleBioC::Location.new(new_a)
184
- new_l.offset = l.offset
185
- new_l.length = l.length
186
- new_a.locations << new_l
187
- end
188
- end
189
- copy_infons(new_a, annotation)
190
- if need_add
191
- dest.annotations << new_a
192
- end
193
- end
194
-
195
- def choose_id(doc, id, id_map)
196
- new_id = id || "id"
197
- node = doc.find_node(new_id)
198
-
199
- until node.nil? do
200
- new_id = new_id + "_c"
201
- node = doc.find_node(new_id)
202
- end
203
-
204
- if new_id != id
205
- id_map[id] = new_id
206
- end
207
- return new_id
208
- end
209
-
210
- def copy_text(dest, src)
211
- if blank?(dest.text) && !blank?(src.text)
212
- dest.text = src.text
213
- end
214
- end
215
-
216
- def blank?(text)
217
- return text.nil? || text.empty?
218
- end
219
-
220
- def copy_infons(dest, src)
221
- src.infons.each do |k, v|
222
- if dest.infons[k].nil?
223
- dest.infons[k] = v
224
- elsif dest.infons[k] != v
225
-
226
- end
227
- end
228
- end
229
- end
@@ -1,45 +0,0 @@
1
- module SimpleBioC
2
- module LocationAdjuster
3
- def adjust_annotation_offsets
4
- obj = self
5
- return if obj.nil? || obj.annotations.nil?
6
- obj.annotations.each do |a|
7
- positions = find_all_locations(obj, a.text)
8
- next a.locations.nil?
9
- a.locations.each do |l|
10
- next if l.nil? || l == false
11
- # l.original_offset = l.offset.to_i if l.original_offset.nil?
12
- l.offset = choose_offset_candidate(l.offset, positions)
13
- end
14
- end
15
- end
16
-
17
- module_function
18
-
19
- def find_all_locations(obj, text)
20
- positions = []
21
- return positions if obj.nil? || obj.text.nil?
22
- pos = obj.text.index(text)
23
- until pos.nil?
24
- positions << (pos + obj.offset)
25
- pos = obj.text.index(text, pos + 1)
26
- end
27
- return positions
28
- end
29
-
30
- def choose_offset_candidate(offset, positions)
31
- return offset if positions.nil?
32
- min_diff = 99999
33
- offset = offset.to_i
34
- ret = offset
35
- positions.each do |p|
36
- diff = (offset - p).abs
37
- if diff < min_diff
38
- ret = p
39
- min_diff = diff
40
- end
41
- end
42
- return ret
43
- end
44
- end
45
- end