simple_bioc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +8 -0
- data/lib/simple_bioc/annotation.rb +11 -0
- data/lib/simple_bioc/bioc_reader.rb +102 -0
- data/lib/simple_bioc/bioc_writer.rb +93 -0
- data/lib/simple_bioc/collection.rb +15 -0
- data/lib/simple_bioc/document.rb +29 -0
- data/lib/simple_bioc/location.rb +10 -0
- data/lib/simple_bioc/node.rb +12 -0
- data/lib/simple_bioc/node_base.rb +14 -0
- data/lib/simple_bioc/passage.rb +29 -0
- data/lib/simple_bioc/relation.rb +14 -0
- data/lib/simple_bioc/sentence.rb +20 -0
- data/lib/simple_bioc/version.rb +3 -0
- data/lib/simple_bioc.rb +14 -0
- data/simple_bioc.gemspec +27 -0
- data/spec/simple_bioc_spec.rb +14 -0
- data/xml/BioC.dtd +146 -0
- data/xml/PMID-8557975-simplified-sentences-tokens.xml +492 -0
- data/xml/PMID-8557975-simplified-sentences.xml +49 -0
- data/xml/abbr.key +71 -0
- data/xml/abbr.xml +1 -0
- data/xml/ascii.key +29 -0
- data/xml/ascii.xml +3 -0
- data/xml/everything-sentence.xml +1 -0
- data/xml/everything.key +8 -0
- data/xml/everything.xml +1 -0
- data/xml/lemma.key +51 -0
- data/xml/lemma.xml +1 -0
- data/xml/pos.key +49 -0
- data/xml/pos.xml +1 -0
- data/xml/sentence.key +36 -0
- data/xml/sentence.xml +1 -0
- metadata +153 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ac827766c1ee157f8eb3a836754da23968863299
|
4
|
+
data.tar.gz: 89bd14e8bba58e50e45d68221d6cf7b915512c96
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bb1560756684d4f65393effcb32237304480718f24a14f871cce67f6cd302f5ee29911b5fcca26cd26f9b61873822cfdbb5405b8b095c0e5dcc0ab560f29cde9
|
7
|
+
data.tar.gz: 5e54ab65f41d74e85f48647f70d241edc2ede844cd2c32c21ebc3096a49cb8ed914304f8c7de004406476728d6d388d21bf6b44efbc454079a361c8deaab94fc
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Dongseop Kwon
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Dongseop Kwon
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# SimpleBioc
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'simple_bioc'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install simple_bioc
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
|
3
|
+
|
4
|
+
module BioCReader
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def read(path)
|
8
|
+
collection = nil
|
9
|
+
File.open(path) do |file|
|
10
|
+
xml_doc = Nokogiri::XML(file) do |config|
|
11
|
+
config.noent.strict.noblanks
|
12
|
+
end
|
13
|
+
xml = xml_doc.at_xpath("//collection")
|
14
|
+
if xml.nil?
|
15
|
+
fail 'Wrong format'
|
16
|
+
end
|
17
|
+
collection = Collection.new
|
18
|
+
read_collection(xml, collection)
|
19
|
+
end
|
20
|
+
|
21
|
+
collection
|
22
|
+
end
|
23
|
+
|
24
|
+
def read_text(xml, name)
|
25
|
+
node = xml.at_xpath(name)
|
26
|
+
node && node.content
|
27
|
+
end
|
28
|
+
|
29
|
+
def read_int(xml, name)
|
30
|
+
val = read_text(xml, name)
|
31
|
+
val && val.to_i
|
32
|
+
end
|
33
|
+
|
34
|
+
def read_infon(xml, obj)
|
35
|
+
xml.xpath("infon").each{ |i| obj.infons[i["key"]] = i.content}
|
36
|
+
end
|
37
|
+
|
38
|
+
def read_recursive(xml, obj, name)
|
39
|
+
target_class = Module.const_get(name.capitalize)
|
40
|
+
xml.xpath(name).each do |node|
|
41
|
+
instance = target_class.new(obj)
|
42
|
+
send(:"read_#{name}", node, instance)
|
43
|
+
obj.instance_variable_get(:"@#{name}s") << instance
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def read_collection(xml, collection)
|
48
|
+
collection.source = read_text(xml, "source")
|
49
|
+
collection.date = read_text(xml, "date")
|
50
|
+
collection.key = read_text(xml, "key")
|
51
|
+
read_infon(xml, collection)
|
52
|
+
read_recursive(xml, collection, "document")
|
53
|
+
end
|
54
|
+
|
55
|
+
def read_document(xml, document)
|
56
|
+
document.id = read_text(xml, "id")
|
57
|
+
read_infon(xml, document)
|
58
|
+
read_recursive(xml, document, "passage")
|
59
|
+
read_recursive(xml, document, "relation")
|
60
|
+
document.adjust_ref
|
61
|
+
end
|
62
|
+
|
63
|
+
def read_passage(xml, passage)
|
64
|
+
passage.text = read_text(xml, "text")
|
65
|
+
passage.offset = read_int(xml, "offset")
|
66
|
+
read_infon(xml, passage)
|
67
|
+
read_recursive(xml, passage, "sentence")
|
68
|
+
read_recursive(xml, passage, "annotation")
|
69
|
+
read_recursive(xml, passage, "relation")
|
70
|
+
end
|
71
|
+
|
72
|
+
def read_sentence(xml, sentence)
|
73
|
+
sentence.text = read_text(xml, "text")
|
74
|
+
sentence.offset = read_int(xml, "offset")
|
75
|
+
read_infon(xml, sentence)
|
76
|
+
read_recursive(xml, sentence, "annotation")
|
77
|
+
read_recursive(xml, sentence, "relation")
|
78
|
+
end
|
79
|
+
|
80
|
+
def read_annotation(xml, annotation)
|
81
|
+
annotation.id = xml["id"]
|
82
|
+
annotation.text = read_text(xml, "text")
|
83
|
+
read_infon(xml, annotation)
|
84
|
+
read_recursive(xml, annotation, "location")
|
85
|
+
end
|
86
|
+
|
87
|
+
def read_relation(xml, relation)
|
88
|
+
relation.id = xml["id"]
|
89
|
+
read_infon(xml, relation)
|
90
|
+
read_recursive(xml, relation, "node")
|
91
|
+
end
|
92
|
+
|
93
|
+
def read_location(xml, location)
|
94
|
+
location.offset = xml["offset"]
|
95
|
+
location.length = xml["length"]
|
96
|
+
end
|
97
|
+
|
98
|
+
def read_node(xml, node)
|
99
|
+
node.refid = xml["refid"]
|
100
|
+
node.role = xml["role"]
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
|
3
|
+
|
4
|
+
module BioCWriter
|
5
|
+
module_function
|
6
|
+
def write(collection)
|
7
|
+
builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
|
8
|
+
write_collection(xml, collection)
|
9
|
+
end
|
10
|
+
builder.to_xml
|
11
|
+
end
|
12
|
+
|
13
|
+
def write_infon(xml, obj)
|
14
|
+
obj.infons.each do |k, v|
|
15
|
+
xml.infon(:key => k) {
|
16
|
+
xml.text v
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def write_collection(xml, collection)
|
22
|
+
xml.collection {
|
23
|
+
xml.source collection.source
|
24
|
+
xml.date collection.date
|
25
|
+
xml.key collection.key
|
26
|
+
write_infon(xml, collection)
|
27
|
+
collection.documents.each{|d| write_document(xml, d)}
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def write_document(xml, document)
|
32
|
+
xml.document {
|
33
|
+
xml.id_ document.id
|
34
|
+
write_infon(xml, document)
|
35
|
+
document.passages.each{|p| write_passage(xml, p)}
|
36
|
+
document.relations.each{|r| write_relation(xml, r)}
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def write_passage(xml, passage)
|
41
|
+
xml.passage {
|
42
|
+
write_infon(xml, passage)
|
43
|
+
xml.offset passage.offset
|
44
|
+
xml.text_ passage.text unless passage.text.nil?
|
45
|
+
passage.sentences.each{|s| write_sentence(xml, s)}
|
46
|
+
passage.annotations.each{|a| write_annotation(xml, a)}
|
47
|
+
passage.relations.each{|r| write_relation(xml, r)}
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def write_sentence(xml, sentence)
|
52
|
+
xml.sentence {
|
53
|
+
write_infon(xml, sentence)
|
54
|
+
xml.offset sentence.offset
|
55
|
+
xml.text_ sentence.text unless sentence.text.nil?
|
56
|
+
sentence.annotations.each{|a| write_annotation(xml, a)}
|
57
|
+
sentence.relations.each{|r| write_relation(xml, r)}
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
def write_annotation(xml, annotation)
|
62
|
+
if annotation.id.nil?
|
63
|
+
attribute = nil
|
64
|
+
else
|
65
|
+
attribute = {id: annotation.id}
|
66
|
+
end
|
67
|
+
xml.annotation(attribute) {
|
68
|
+
write_infon(xml, annotation)
|
69
|
+
xml.text_ annotation.text
|
70
|
+
annotation.locations.each{|l| write_location(xml, l)}
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
def write_relation(xml, relation)
|
75
|
+
if relation.id.nil?
|
76
|
+
attribute = nil
|
77
|
+
else
|
78
|
+
attribute = {id: relation.id}
|
79
|
+
end
|
80
|
+
xml.relation(attribute) {
|
81
|
+
write_infon(xml, relation)
|
82
|
+
relation.nodes.each{|n| write_node(xml, n)}
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
def write_location(xml, location)
|
87
|
+
xml.location(:offset => location.offset, :length => location.length)
|
88
|
+
end
|
89
|
+
|
90
|
+
def write_node(xml, node)
|
91
|
+
xml.node_(:refid => node.refid, :role => node.role)
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class Document
|
2
|
+
attr_accessor :id, :infons, :passages, :relations
|
3
|
+
attr_reader :collection
|
4
|
+
|
5
|
+
def initialize(parent)
|
6
|
+
@infons = {}
|
7
|
+
@passages = []
|
8
|
+
@relations = []
|
9
|
+
@collection = parent
|
10
|
+
end
|
11
|
+
|
12
|
+
def find_node(id)
|
13
|
+
relations.each{|r| return r if r.id == id}
|
14
|
+
passages.each do |p|
|
15
|
+
ret = p.find_node(id)
|
16
|
+
return ret unless ret.nil?
|
17
|
+
end
|
18
|
+
nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def adjust_ref
|
22
|
+
each_relation{|r| r.adjust_ref}
|
23
|
+
end
|
24
|
+
|
25
|
+
def each_relation
|
26
|
+
relations.each{|r| yield r}
|
27
|
+
passages.each{|p| p.each_relation{|r| yield r}}
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class NodeBase
|
2
|
+
attr_accessor :id, :infons
|
3
|
+
attr_reader :document, :passage, :sentence
|
4
|
+
|
5
|
+
def initialize(parent)
|
6
|
+
@infons = {}
|
7
|
+
@document = parent if parent.is_a? Document
|
8
|
+
@passage = parent if parent.is_a? Passage
|
9
|
+
@sentence = parent if parent.is_a? Sentence
|
10
|
+
|
11
|
+
@passage = @sentence.passage unless @sentence.nil?
|
12
|
+
@document = @passage.document unless @passage.nil?
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class Passage
|
2
|
+
attr_accessor :offset, :text, :infons, :sentences, :annotations, :relations
|
3
|
+
attr_reader :document
|
4
|
+
|
5
|
+
def initialize(parent)
|
6
|
+
@infons = {}
|
7
|
+
@sentences = []
|
8
|
+
@annotations = []
|
9
|
+
@relations = []
|
10
|
+
@document = parent
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
"#{offset}:#{text}"
|
15
|
+
end
|
16
|
+
def find_node(id)
|
17
|
+
(relations+annotations).each{|n| return n if n.id == id}
|
18
|
+
sentences.each do |s|
|
19
|
+
ret = s.find_node(id)
|
20
|
+
return ret unless ret.nil?
|
21
|
+
end
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
|
25
|
+
def each_relation
|
26
|
+
relations.each{|r| yield r}
|
27
|
+
sentences.each{|s| s.each_relation{|r| yield r}}
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Sentence
|
2
|
+
attr_accessor :offset, :text, :infons, :annotations, :relations
|
3
|
+
attr_reader :passage
|
4
|
+
|
5
|
+
def initialize(parent)
|
6
|
+
@infons = {}
|
7
|
+
@annotations = []
|
8
|
+
@relations = []
|
9
|
+
@passage = parent
|
10
|
+
end
|
11
|
+
|
12
|
+
def find_node(id)
|
13
|
+
(relations+annotations).each{|n| return n if n.id == id}
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def each_relation
|
18
|
+
relations.each{|r| yield r}
|
19
|
+
end
|
20
|
+
end
|
data/lib/simple_bioc.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "simple_bioc/version"
|
2
|
+
require "simple_bioc/bioc_reader"
|
3
|
+
require "simple_bioc/bioc_writer"
|
4
|
+
|
5
|
+
module SimpleBioC
|
6
|
+
module_function
|
7
|
+
def from_xml(file_path)
|
8
|
+
BioCReader.read(file_path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_xml(collection)
|
12
|
+
BioCWriter.write(collection)
|
13
|
+
end
|
14
|
+
end
|
data/simple_bioc.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'simple_bioc/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "simple_bioc"
|
8
|
+
spec.version = SimpleBioc::VERSION
|
9
|
+
spec.authors = ["Dongseop Kwon"]
|
10
|
+
spec.email = ["dongseop@gmail.com"]
|
11
|
+
spec.description = "Simple BioC parser/builder for ruby. BioC is a 'A Minimalist Approach to Interoperability for Biomedical Text Processing' (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCHome.html)"
|
12
|
+
spec.summary = "Simple BioC parser/builder for ruby"
|
13
|
+
spec.homepage = "https://github.com/dongseop/simple_bioc"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency("nokogiri", [">= 1.3.2"])
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency("rspec-core", ["~> 2.2"])
|
26
|
+
spec.add_development_dependency("test-xml", ["~> 0.1.6"])
|
27
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# bowling_spec.rb
|
2
|
+
require 'simple_bioc'
|
3
|
+
require 'test_xml/spec'
|
4
|
+
describe BioCReader do
|
5
|
+
it "should be load successfully" do
|
6
|
+
Dir["./xml/*.xml"].each do |file_path|
|
7
|
+
puts file_path
|
8
|
+
collection = SimpleBioC.from_xml(file_path)
|
9
|
+
output = SimpleBioC.to_xml(collection)
|
10
|
+
expected = File.read(file_path)
|
11
|
+
expect(output).to equal_xml(expected)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|