simple_bioc 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9861d717fec1b45ab297b2ed93275e77be0c34ee
4
- data.tar.gz: 974019424b72ab4bf3bd87d9444c6f2b04b009b4
3
+ metadata.gz: 344d68b7d1e117a05da96939c1ace1ff10cb630f
4
+ data.tar.gz: 549cfc037ec425cfa53815238aab5628044346c6
5
5
  SHA512:
6
- metadata.gz: 32cf4b3b3f4110098dc780abc434a3f9625e68aa5f98b198537e4672febd6700e215b8cbec7e7c67c7f3c86b4752ac191b88aedc3a25ca50ae2b38102bcc374e
7
- data.tar.gz: 8a982d871c4660d6ceb06f20ce54f96987048b9b7bbc4dc24bb43e9604817b888672f060f9cf2c91308a10646c99350c63008b1c4ae9ec046d6e3eb6707e1714
6
+ metadata.gz: e19302d994c531a1fe9ac2407a48c4517fbef8a6f0671bbc82a2e06e381613d0d48205b705ca372ddd9d8f63282b1845ac5775aec7bf603ed9ddc87fd3a599e9
7
+ data.tar.gz: fa6a72ce6773ee3ac3726be469e3bfa0b3fc808827f4da62926fe7fca7da67eb962fbf3ee4863a456ffc063e32dfcb77c9489efa0d4d5c77e47110bee34224f0
@@ -84,7 +84,6 @@ module BioCReader
84
84
  read_recursive(xml, passage, "sentence")
85
85
  read_recursive(xml, passage, "annotation")
86
86
  read_recursive(xml, passage, "relation")
87
- passage.adjust_annotation_offsets
88
87
  true
89
88
  end
90
89
 
@@ -94,7 +93,6 @@ module BioCReader
94
93
  read_infon(xml, sentence)
95
94
  read_recursive(xml, sentence, "annotation")
96
95
  read_recursive(xml, sentence, "relation")
97
- sentence.adjust_annotation_offsets
98
96
  true
99
97
  end
100
98
 
@@ -1,8 +1,5 @@
1
- require 'simple_bioc/location_adjuster'
2
1
  module SimpleBioC
3
2
  class Passage
4
- include LocationAdjuster
5
-
6
3
  attr_accessor :offset, :text, :infons, :sentences, :annotations, :relations
7
4
  attr_reader :document
8
5
 
@@ -1,8 +1,5 @@
1
- require 'simple_bioc/location_adjuster'
2
1
  module SimpleBioC
3
2
  class Sentence
4
- include LocationAdjuster
5
-
6
3
  attr_accessor :offset, :text, :infons, :annotations, :relations
7
4
  attr_reader :passage
8
5
 
@@ -1,3 +1,3 @@
1
1
  module SimpleBioC
2
- VERSION = "0.0.20"
2
+ VERSION = "0.0.21"
3
3
  end
data/lib/simple_bioc.rb CHANGED
@@ -42,10 +42,6 @@ module SimpleBioC
42
42
  BioCReader.read_from_file_or_string(file, options)
43
43
  end
44
44
 
45
- def merge(dest_bioc, src_bioc)
46
- return BioCMerger.merge(dest_bioc, src_bioc)
47
- end
48
-
49
45
  # parse a BioC XML string and convert it into a collection instance
50
46
  #
51
47
  # ==== Arguments
@@ -11,62 +11,10 @@ describe "File Check" do
11
11
  end
12
12
  end
13
13
 
14
- it "should merge documents successfully" do
15
- col1 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
16
- col2 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")
17
- col3 = SimpleBioC.from_xml("./xml/merge/9864355_2.xml")
18
- col4 = SimpleBioC.from_xml("./xml/merge/9864355_3.xml")
19
-
20
- SimpleBioC.merge(col1, col2)
21
- SimpleBioC.merge(col1, col3)
22
- SimpleBioC.merge(col1, col4)
23
- output = SimpleBioC.to_xml(col1)
24
- File.write("./xml/merge/output.xml", output)
25
- puts "merge1"
26
- col5 = SimpleBioC.from_xml("./xml/merge/output.xml")
27
- end
28
-
29
- it "should merge same documents successfully" do
30
- col1 = SimpleBioC.from_xml("./xml/10330397_gene.xml")
31
- col2 = SimpleBioC.from_xml("./xml/10330397_ppimention.xml")
32
-
33
- SimpleBioC.merge(col1, col2)
34
- output = SimpleBioC.to_xml(col1)
35
- File.write("./xml/merge/output_10330397.xml", output)
36
- puts "merge2"
37
- col5 = SimpleBioC.from_xml("./xml/merge/output_10330397.xml")
38
- end
39
-
40
14
  it "should fix location problem" do
41
15
  col1 = SimpleBioC.from_xml("./xml/merge/10366597_error.xml")
42
16
  output = SimpleBioC.to_xml(col1)
43
17
  File.write("./xml/merge/output_10366597.xml", output)
44
18
  col5 = SimpleBioC.from_xml("./xml/merge/output_10366597.xml")
45
19
  end
46
-
47
-
48
- it "should merge documents successfully with different order" do
49
- col4 = SimpleBioC.from_xml("./xml/merge/9864355.xml")
50
- col3 = SimpleBioC.from_xml("./xml/merge/9864355_1.xml")
51
- col1 = SimpleBioC.from_xml("./xml/merge/9864355_2.xml")
52
- col2 = SimpleBioC.from_xml("./xml/merge/9864355_3.xml")
53
-
54
- SimpleBioC.merge(col1, col2)
55
- SimpleBioC.merge(col1, col3)
56
- SimpleBioC.merge(col1, col4)
57
- output = SimpleBioC.to_xml(col1)
58
- File.write("./xml/merge/output.xml", output)
59
- puts "merge12"
60
- col5 = SimpleBioC.from_xml("./xml/merge/output.xml")
61
- end
62
-
63
- it "should load XML files successfully" do
64
- Dir["./xml/*.xml"].each do |file_path|
65
- puts "self-merge #{file_path}"
66
- collection1 = SimpleBioC.from_xml(file_path)
67
- collection2 = SimpleBioC.from_xml(file_path)
68
- SimpleBioC.merge(collection1, collection2)
69
- output = SimpleBioC.to_xml(collection1)
70
- end
71
- end
72
20
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_bioc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.20
4
+ version: 0.0.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dongseop Kwon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-04 00:00:00.000000000 Z
11
+ date: 2016-02-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -185,13 +185,11 @@ files:
185
185
  - html/table_of_contents.html
186
186
  - lib/simple_bioc.rb
187
187
  - lib/simple_bioc/annotation.rb
188
- - lib/simple_bioc/bioc_merger.rb
189
188
  - lib/simple_bioc/bioc_reader.rb
190
189
  - lib/simple_bioc/bioc_writer.rb
191
190
  - lib/simple_bioc/collection.rb
192
191
  - lib/simple_bioc/document.rb
193
192
  - lib/simple_bioc/location.rb
194
- - lib/simple_bioc/location_adjuster.rb
195
193
  - lib/simple_bioc/node.rb
196
194
  - lib/simple_bioc/node_base.rb
197
195
  - lib/simple_bioc/passage.rb
@@ -1,229 +0,0 @@
1
- require 'nokogiri'
2
-
3
- Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
4
-
5
- module BioCMerger
6
- module_function
7
-
8
- def merge(dest_collection, src_collection)
9
- errors = []
10
- warnings = []
11
- id_map = {}
12
-
13
- if dest_collection.documents.size != 1 || src_collection.documents.size != 1
14
- warnings << 'Only the first documents will be merged'
15
- end
16
-
17
- doc_d = dest_collection.documents[0]
18
- doc_s = src_collection.documents[0]
19
-
20
- copy_infons(dest_collection, src_collection)
21
- dest_collection.source = src_collection.source if dest_collection.source.nil? || dest_collection.source.empty?
22
- dest_collection.date = src_collection.date if dest_collection.date.nil? || dest_collection.date.empty?
23
- dest_collection.key = src_collection.key if dest_collection.key.nil? || dest_collection.key.empty?
24
-
25
- copy_infons(doc_d, doc_s)
26
- copy_relations(doc_d, doc_d, doc_s, id_map)
27
-
28
- if doc_d.passages.size != doc_s.passages.size
29
- warnings << 'Passages will not be merged because the numbers of passages in documents are different'
30
- end
31
-
32
- doc_d.passages.each_with_index do |p_d, index|
33
- p_s = doc_s.passages[index]
34
- if p_d.nil? || p_s.nil?
35
- warnings << 'The number of sentences in pages should be the same'
36
- elsif blank?(p_d.text) && blank?(p_s.text) && p_d.sentences.size != p_s.sentences.size
37
- warnings << 'The number of sentences in pages should be the same'
38
- end
39
- end
40
-
41
- doc_d.passages.each_with_index do |p_d, index|
42
- p_s = doc_s.passages[index]
43
- next if p_d.nil? || p_s.nil?
44
- copy_relations(doc_d, p_d, p_s, id_map)
45
- if p_d.sentences.size == p_s.sentences.size
46
- p_d.sentences.each_with_index do |s_d, index|
47
- s_s = p_s.sentences[index]
48
- copy_infons(s_d, s_s)
49
- copy_text(s_d, s_s)
50
- copy_relations(doc_d, s_d, s_s, id_map)
51
- copy_annotations(doc_d, s_d, s_s, id_map)
52
- s_d.adjust_annotation_offsets
53
- end
54
- elsif p_d.sentences.size == 0
55
- p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
56
- p_s.sentences.each do |s|
57
- copy_relations(doc_d, p_d, s, id_map)
58
- copy_annotations(doc_d, p_d, s, id_map)
59
- end
60
- elsif p_s.sentences.size == 0
61
- if p_d.sentences.size > 0
62
- # dest has sentences, but src has only passages.
63
- p_d.text = p_d.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
64
- p_d.sentences.each do |s|
65
- s.annotations.each do |a|
66
- a.clear_sentence
67
- p_d.annotations << a
68
- end
69
- s.relations.each do |r|
70
- r.clear_sentence
71
- p_d.relations << r
72
- end
73
- end
74
- p_d.sentences.clear
75
- else
76
- copy_text(p_d, p_s)
77
- end
78
- end
79
- copy_annotations(doc_d, p_d, p_s, id_map)
80
- p_d.adjust_annotation_offsets
81
- end
82
- puts warnings
83
- end
84
-
85
- def adjust_relation_refids(doc, id_map)
86
- adjust_relation_refid(doc, id_map)
87
- doc.passages.each do |p|
88
- adjust_relation_refid(p, id_map)
89
- p.sentences.each do |s|
90
- adjust_relation_refid(s, id_map)
91
- end
92
- end
93
- end
94
-
95
- def adjust_relation_refid(obj, id_map)
96
- return if obj.nil?
97
- obj.relations.each do |r|
98
- next if r.original.nil?
99
- r.nodes.each do |n|
100
- new_id = id_map[n.refid]
101
- n.refid = new_id unless new_id.nil?
102
- n.adjust_ref
103
- end
104
- end
105
- end
106
-
107
- def copy_relations(doc, dest, src, id_map)
108
- return if src.nil?
109
- src.relations.each do |r|
110
- copy_relation(doc, dest, r, id_map)
111
- end
112
- end
113
-
114
- def copy_annotations(doc, dest, src, id_map)
115
- return if src.nil?
116
- src.annotations.each do |a|
117
- copy_annotation(doc, dest, a, id_map)
118
- end
119
- end
120
- def copy_relation(doc, dest, relation, id_map)
121
- new_r = nil
122
- need_add = true
123
- dest.relations.each do |r|
124
- if r.id == relation.id
125
- new_r = r
126
- need_add = false
127
- break
128
- end
129
- end
130
- if new_r.nil?
131
- new_r = SimpleBioC::Relation.new(dest)
132
- new_r.id = choose_id(doc, relation.id, id_map)
133
- new_r.original = relation
134
- end
135
-
136
- relation.nodes.each do |n|
137
- found = false
138
- new_r.nodes.each do |old_n|
139
- if n.refid == old_n.refid && n.role == old_n.role
140
- found = true
141
- break
142
- end
143
- end
144
- unless found
145
- node = SimpleBioC::Node.new(new_r)
146
- node.refid = n.refid
147
- node.role = n.role
148
- new_r.nodes << node
149
- end
150
- end
151
- copy_infons(new_r, relation)
152
- if need_add
153
- dest.relations << new_r
154
- end
155
- end
156
-
157
- def copy_annotation(doc, dest, annotation, id_map)
158
- new_a = nil
159
- need_add = true
160
- dest.annotations.each do |a|
161
- if a.id == annotation.id && a.text == annotation.text
162
- new_a = a
163
- need_add = false
164
- break
165
- end
166
- end
167
- if new_a.nil?
168
- new_a = SimpleBioC::Annotation.new(dest)
169
- new_a.id = choose_id(doc, annotation.id, id_map)
170
- new_a.text = annotation.text
171
- new_a.locations = []
172
- end
173
-
174
- annotation.locations.each do |l|
175
- found = false
176
- new_a.locations.each do |old_l|
177
- if l.offset == old_l.offset && l.length == old_l.length
178
- found = true
179
- break
180
- end
181
- end
182
- unless found
183
- new_l = SimpleBioC::Location.new(new_a)
184
- new_l.offset = l.offset
185
- new_l.length = l.length
186
- new_a.locations << new_l
187
- end
188
- end
189
- copy_infons(new_a, annotation)
190
- if need_add
191
- dest.annotations << new_a
192
- end
193
- end
194
-
195
- def choose_id(doc, id, id_map)
196
- new_id = id || "id"
197
- node = doc.find_node(new_id)
198
-
199
- until node.nil? do
200
- new_id = new_id + "_c"
201
- node = doc.find_node(new_id)
202
- end
203
-
204
- if new_id != id
205
- id_map[id] = new_id
206
- end
207
- return new_id
208
- end
209
-
210
- def copy_text(dest, src)
211
- if blank?(dest.text) && !blank?(src.text)
212
- dest.text = src.text
213
- end
214
- end
215
-
216
- def blank?(text)
217
- return text.nil? || text.empty?
218
- end
219
-
220
- def copy_infons(dest, src)
221
- src.infons.each do |k, v|
222
- if dest.infons[k].nil?
223
- dest.infons[k] = v
224
- elsif dest.infons[k] != v
225
-
226
- end
227
- end
228
- end
229
- end
@@ -1,45 +0,0 @@
1
- module SimpleBioC
2
- module LocationAdjuster
3
- def adjust_annotation_offsets
4
- obj = self
5
- return if obj.nil? || obj.annotations.nil?
6
- obj.annotations.each do |a|
7
- positions = find_all_locations(obj, a.text)
8
- next a.locations.nil?
9
- a.locations.each do |l|
10
- next if l.nil? || l == false
11
- # l.original_offset = l.offset.to_i if l.original_offset.nil?
12
- l.offset = choose_offset_candidate(l.offset, positions)
13
- end
14
- end
15
- end
16
-
17
- module_function
18
-
19
- def find_all_locations(obj, text)
20
- positions = []
21
- return positions if obj.nil? || obj.text.nil?
22
- pos = obj.text.index(text)
23
- until pos.nil?
24
- positions << (pos + obj.offset)
25
- pos = obj.text.index(text, pos + 1)
26
- end
27
- return positions
28
- end
29
-
30
- def choose_offset_candidate(offset, positions)
31
- return offset if positions.nil?
32
- min_diff = 99999
33
- offset = offset.to_i
34
- ret = offset
35
- positions.each do |p|
36
- diff = (offset - p).abs
37
- if diff < min_diff
38
- ret = p
39
- min_diff = diff
40
- end
41
- end
42
- return ret
43
- end
44
- end
45
- end