simple_bioc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ac827766c1ee157f8eb3a836754da23968863299
4
+ data.tar.gz: 89bd14e8bba58e50e45d68221d6cf7b915512c96
5
+ SHA512:
6
+ metadata.gz: bb1560756684d4f65393effcb32237304480718f24a14f871cce67f6cd302f5ee29911b5fcca26cd26f9b61873822cfdbb5405b8b095c0e5dcc0ab560f29cde9
7
+ data.tar.gz: 5e54ab65f41d74e85f48647f70d241edc2ede844cd2c32c21ebc3096a49cb8ed914304f8c7de004406476728d6d388d21bf6b44efbc454079a361c8deaab94fc
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ Gemfile.lock
6
+ InstalledFiles
7
+ coverage
8
+ lib/bundler/man
9
+ pkg
10
+ rdoc
11
+ spec/reports
12
+ test/tmp
13
+ test/version_tmp
14
+ tmp
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in simple_bioc.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Dongseop Kwon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Dongseop Kwon
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # SimpleBioc
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'simple_bioc'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install simple_bioc
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ require 'rdoc/task'
4
+
5
+ task :default => [:spec]
6
+ RSpec::Core::RakeTask.new do |t|
7
+ t.verbose = true
8
+ end
@@ -0,0 +1,11 @@
1
+ require 'simple_bioc/node_base'
2
+
3
+ class Annotation < NodeBase
4
+ attr_accessor :locations, :text
5
+
6
+ def initialize(parent)
7
+ super(parent)
8
+
9
+ @locations = []
10
+ end
11
+ end
@@ -0,0 +1,102 @@
1
+ require 'nokogiri'
2
+ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
3
+
4
+ module BioCReader
5
+ module_function
6
+
7
+ def read(path)
8
+ collection = nil
9
+ File.open(path) do |file|
10
+ xml_doc = Nokogiri::XML(file) do |config|
11
+ config.noent.strict.noblanks
12
+ end
13
+ xml = xml_doc.at_xpath("//collection")
14
+ if xml.nil?
15
+ fail 'Wrong format'
16
+ end
17
+ collection = Collection.new
18
+ read_collection(xml, collection)
19
+ end
20
+
21
+ collection
22
+ end
23
+
24
+ def read_text(xml, name)
25
+ node = xml.at_xpath(name)
26
+ node && node.content
27
+ end
28
+
29
+ def read_int(xml, name)
30
+ val = read_text(xml, name)
31
+ val && val.to_i
32
+ end
33
+
34
+ def read_infon(xml, obj)
35
+ xml.xpath("infon").each{ |i| obj.infons[i["key"]] = i.content}
36
+ end
37
+
38
+ def read_recursive(xml, obj, name)
39
+ target_class = Module.const_get(name.capitalize)
40
+ xml.xpath(name).each do |node|
41
+ instance = target_class.new(obj)
42
+ send(:"read_#{name}", node, instance)
43
+ obj.instance_variable_get(:"@#{name}s") << instance
44
+ end
45
+ end
46
+
47
+ def read_collection(xml, collection)
48
+ collection.source = read_text(xml, "source")
49
+ collection.date = read_text(xml, "date")
50
+ collection.key = read_text(xml, "key")
51
+ read_infon(xml, collection)
52
+ read_recursive(xml, collection, "document")
53
+ end
54
+
55
+ def read_document(xml, document)
56
+ document.id = read_text(xml, "id")
57
+ read_infon(xml, document)
58
+ read_recursive(xml, document, "passage")
59
+ read_recursive(xml, document, "relation")
60
+ document.adjust_ref
61
+ end
62
+
63
+ def read_passage(xml, passage)
64
+ passage.text = read_text(xml, "text")
65
+ passage.offset = read_int(xml, "offset")
66
+ read_infon(xml, passage)
67
+ read_recursive(xml, passage, "sentence")
68
+ read_recursive(xml, passage, "annotation")
69
+ read_recursive(xml, passage, "relation")
70
+ end
71
+
72
+ def read_sentence(xml, sentence)
73
+ sentence.text = read_text(xml, "text")
74
+ sentence.offset = read_int(xml, "offset")
75
+ read_infon(xml, sentence)
76
+ read_recursive(xml, sentence, "annotation")
77
+ read_recursive(xml, sentence, "relation")
78
+ end
79
+
80
+ def read_annotation(xml, annotation)
81
+ annotation.id = xml["id"]
82
+ annotation.text = read_text(xml, "text")
83
+ read_infon(xml, annotation)
84
+ read_recursive(xml, annotation, "location")
85
+ end
86
+
87
+ def read_relation(xml, relation)
88
+ relation.id = xml["id"]
89
+ read_infon(xml, relation)
90
+ read_recursive(xml, relation, "node")
91
+ end
92
+
93
+ def read_location(xml, location)
94
+ location.offset = xml["offset"]
95
+ location.length = xml["length"]
96
+ end
97
+
98
+ def read_node(xml, node)
99
+ node.refid = xml["refid"]
100
+ node.role = xml["role"]
101
+ end
102
+ end
@@ -0,0 +1,93 @@
1
+ require 'nokogiri'
2
+ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
3
+
4
+ module BioCWriter
5
+ module_function
6
+ def write(collection)
7
+ builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
8
+ write_collection(xml, collection)
9
+ end
10
+ builder.to_xml
11
+ end
12
+
13
+ def write_infon(xml, obj)
14
+ obj.infons.each do |k, v|
15
+ xml.infon(:key => k) {
16
+ xml.text v
17
+ }
18
+ end
19
+ end
20
+
21
+ def write_collection(xml, collection)
22
+ xml.collection {
23
+ xml.source collection.source
24
+ xml.date collection.date
25
+ xml.key collection.key
26
+ write_infon(xml, collection)
27
+ collection.documents.each{|d| write_document(xml, d)}
28
+ }
29
+ end
30
+
31
+ def write_document(xml, document)
32
+ xml.document {
33
+ xml.id_ document.id
34
+ write_infon(xml, document)
35
+ document.passages.each{|p| write_passage(xml, p)}
36
+ document.relations.each{|r| write_relation(xml, r)}
37
+ }
38
+ end
39
+
40
+ def write_passage(xml, passage)
41
+ xml.passage {
42
+ write_infon(xml, passage)
43
+ xml.offset passage.offset
44
+ xml.text_ passage.text unless passage.text.nil?
45
+ passage.sentences.each{|s| write_sentence(xml, s)}
46
+ passage.annotations.each{|a| write_annotation(xml, a)}
47
+ passage.relations.each{|r| write_relation(xml, r)}
48
+ }
49
+ end
50
+
51
+ def write_sentence(xml, sentence)
52
+ xml.sentence {
53
+ write_infon(xml, sentence)
54
+ xml.offset sentence.offset
55
+ xml.text_ sentence.text unless sentence.text.nil?
56
+ sentence.annotations.each{|a| write_annotation(xml, a)}
57
+ sentence.relations.each{|r| write_relation(xml, r)}
58
+ }
59
+ end
60
+
61
+ def write_annotation(xml, annotation)
62
+ if annotation.id.nil?
63
+ attribute = nil
64
+ else
65
+ attribute = {id: annotation.id}
66
+ end
67
+ xml.annotation(attribute) {
68
+ write_infon(xml, annotation)
69
+ xml.text_ annotation.text
70
+ annotation.locations.each{|l| write_location(xml, l)}
71
+ }
72
+ end
73
+
74
+ def write_relation(xml, relation)
75
+ if relation.id.nil?
76
+ attribute = nil
77
+ else
78
+ attribute = {id: relation.id}
79
+ end
80
+ xml.relation(attribute) {
81
+ write_infon(xml, relation)
82
+ relation.nodes.each{|n| write_node(xml, n)}
83
+ }
84
+ end
85
+
86
+ def write_location(xml, location)
87
+ xml.location(:offset => location.offset, :length => location.length)
88
+ end
89
+
90
+ def write_node(xml, node)
91
+ xml.node_(:refid => node.refid, :role => node.role)
92
+ end
93
+ end
@@ -0,0 +1,15 @@
1
+ class Collection
2
+ attr_accessor :documents, :infons, :source, :date, :key
3
+
4
+ def initialize
5
+ @documents = []
6
+ @infons = {}
7
+ @source = ""
8
+ @date = ""
9
+ @key = ""
10
+ end
11
+
12
+ def to_xml
13
+
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ class Document
2
+ attr_accessor :id, :infons, :passages, :relations
3
+ attr_reader :collection
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @passages = []
8
+ @relations = []
9
+ @collection = parent
10
+ end
11
+
12
+ def find_node(id)
13
+ relations.each{|r| return r if r.id == id}
14
+ passages.each do |p|
15
+ ret = p.find_node(id)
16
+ return ret unless ret.nil?
17
+ end
18
+ nil
19
+ end
20
+
21
+ def adjust_ref
22
+ each_relation{|r| r.adjust_ref}
23
+ end
24
+
25
+ def each_relation
26
+ relations.each{|r| yield r}
27
+ passages.each{|p| p.each_relation{|r| yield r}}
28
+ end
29
+ end
@@ -0,0 +1,10 @@
1
+ class Location
2
+ attr_accessor :offset, :length
3
+ attr_reader :annotation
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @locations = []
8
+ @annotation = parent
9
+ end
10
+ end
@@ -0,0 +1,12 @@
1
+ class Node
2
+ attr_accessor :refid, :role
3
+ attr_reader :ref, :relation
4
+
5
+ def initialize(parent)
6
+ @relation = parent
7
+ end
8
+
9
+ def adjust_ref
10
+ @ref = relation.document.find_node(refid)
11
+ end
12
+ end
@@ -0,0 +1,14 @@
1
+ class NodeBase
2
+ attr_accessor :id, :infons
3
+ attr_reader :document, :passage, :sentence
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @document = parent if parent.is_a? Document
8
+ @passage = parent if parent.is_a? Passage
9
+ @sentence = parent if parent.is_a? Sentence
10
+
11
+ @passage = @sentence.passage unless @sentence.nil?
12
+ @document = @passage.document unless @passage.nil?
13
+ end
14
+ end
@@ -0,0 +1,29 @@
1
+ class Passage
2
+ attr_accessor :offset, :text, :infons, :sentences, :annotations, :relations
3
+ attr_reader :document
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @sentences = []
8
+ @annotations = []
9
+ @relations = []
10
+ @document = parent
11
+ end
12
+
13
+ def to_s
14
+ "#{offset}:#{text}"
15
+ end
16
+ def find_node(id)
17
+ (relations+annotations).each{|n| return n if n.id == id}
18
+ sentences.each do |s|
19
+ ret = s.find_node(id)
20
+ return ret unless ret.nil?
21
+ end
22
+ nil
23
+ end
24
+
25
+ def each_relation
26
+ relations.each{|r| yield r}
27
+ sentences.each{|s| s.each_relation{|r| yield r}}
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ require 'simple_bioc/node_base'
2
+
3
+ class Relation < NodeBase
4
+ attr_accessor :nodes
5
+
6
+ def initialize(parent)
7
+ super(parent)
8
+ @nodes = []
9
+ end
10
+
11
+ def adjust_ref
12
+ nodes.each{|n| n.adjust_ref}
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ class Sentence
2
+ attr_accessor :offset, :text, :infons, :annotations, :relations
3
+ attr_reader :passage
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @annotations = []
8
+ @relations = []
9
+ @passage = parent
10
+ end
11
+
12
+ def find_node(id)
13
+ (relations+annotations).each{|n| return n if n.id == id}
14
+ nil
15
+ end
16
+
17
+ def each_relation
18
+ relations.each{|r| yield r}
19
+ end
20
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleBioc
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,14 @@
1
+ require "simple_bioc/version"
2
+ require "simple_bioc/bioc_reader"
3
+ require "simple_bioc/bioc_writer"
4
+
5
+ module SimpleBioC
6
+ module_function
7
+ def from_xml(file_path)
8
+ BioCReader.read(file_path)
9
+ end
10
+
11
+ def to_xml(collection)
12
+ BioCWriter.write(collection)
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'simple_bioc/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "simple_bioc"
8
+ spec.version = SimpleBioc::VERSION
9
+ spec.authors = ["Dongseop Kwon"]
10
+ spec.email = ["dongseop@gmail.com"]
11
+ spec.description = "Simple BioC parser/builder for ruby. BioC is a 'A Minimalist Approach to Interoperability for Biomedical Text Processing' (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCHome.html)"
12
+ spec.summary = "Simple BioC parser/builder for ruby"
13
+ spec.homepage = "https://github.com/dongseop/simple_bioc"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency("nokogiri", [">= 1.3.2"])
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency("rspec-core", ["~> 2.2"])
26
+ spec.add_development_dependency("test-xml", ["~> 0.1.6"])
27
+ end
@@ -0,0 +1,14 @@
1
+ # bowling_spec.rb
2
+ require 'simple_bioc'
3
+ require 'test_xml/spec'
4
+ describe BioCReader do
5
+ it "should be load successfully" do
6
+ Dir["./xml/*.xml"].each do |file_path|
7
+ puts file_path
8
+ collection = SimpleBioC.from_xml(file_path)
9
+ output = SimpleBioC.to_xml(collection)
10
+ expected = File.read(file_path)
11
+ expect(output).to equal_xml(expected)
12
+ end
13
+ end
14
+ end