simple_bioc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ac827766c1ee157f8eb3a836754da23968863299
4
+ data.tar.gz: 89bd14e8bba58e50e45d68221d6cf7b915512c96
5
+ SHA512:
6
+ metadata.gz: bb1560756684d4f65393effcb32237304480718f24a14f871cce67f6cd302f5ee29911b5fcca26cd26f9b61873822cfdbb5405b8b095c0e5dcc0ab560f29cde9
7
+ data.tar.gz: 5e54ab65f41d74e85f48647f70d241edc2ede844cd2c32c21ebc3096a49cb8ed914304f8c7de004406476728d6d388d21bf6b44efbc454079a361c8deaab94fc
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ Gemfile.lock
6
+ InstalledFiles
7
+ coverage
8
+ lib/bundler/man
9
+ pkg
10
+ rdoc
11
+ spec/reports
12
+ test/tmp
13
+ test/version_tmp
14
+ tmp
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in simple_bioc.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Dongseop Kwon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Dongseop Kwon
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # SimpleBioc
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'simple_bioc'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install simple_bioc
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ require 'rdoc/task'
4
+
5
+ task :default => [:spec]
6
+ RSpec::Core::RakeTask.new do |t|
7
+ t.verbose = true
8
+ end
@@ -0,0 +1,11 @@
1
+ require 'simple_bioc/node_base'
2
+
3
+ class Annotation < NodeBase
4
+ attr_accessor :locations, :text
5
+
6
+ def initialize(parent)
7
+ super(parent)
8
+
9
+ @locations = []
10
+ end
11
+ end
@@ -0,0 +1,102 @@
1
+ require 'nokogiri'
2
+ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
3
+
4
+ module BioCReader
5
+ module_function
6
+
7
+ def read(path)
8
+ collection = nil
9
+ File.open(path) do |file|
10
+ xml_doc = Nokogiri::XML(file) do |config|
11
+ config.noent.strict.noblanks
12
+ end
13
+ xml = xml_doc.at_xpath("//collection")
14
+ if xml.nil?
15
+ fail 'Wrong format'
16
+ end
17
+ collection = Collection.new
18
+ read_collection(xml, collection)
19
+ end
20
+
21
+ collection
22
+ end
23
+
24
+ def read_text(xml, name)
25
+ node = xml.at_xpath(name)
26
+ node && node.content
27
+ end
28
+
29
+ def read_int(xml, name)
30
+ val = read_text(xml, name)
31
+ val && val.to_i
32
+ end
33
+
34
+ def read_infon(xml, obj)
35
+ xml.xpath("infon").each{ |i| obj.infons[i["key"]] = i.content}
36
+ end
37
+
38
+ def read_recursive(xml, obj, name)
39
+ target_class = Module.const_get(name.capitalize)
40
+ xml.xpath(name).each do |node|
41
+ instance = target_class.new(obj)
42
+ send(:"read_#{name}", node, instance)
43
+ obj.instance_variable_get(:"@#{name}s") << instance
44
+ end
45
+ end
46
+
47
+ def read_collection(xml, collection)
48
+ collection.source = read_text(xml, "source")
49
+ collection.date = read_text(xml, "date")
50
+ collection.key = read_text(xml, "key")
51
+ read_infon(xml, collection)
52
+ read_recursive(xml, collection, "document")
53
+ end
54
+
55
+ def read_document(xml, document)
56
+ document.id = read_text(xml, "id")
57
+ read_infon(xml, document)
58
+ read_recursive(xml, document, "passage")
59
+ read_recursive(xml, document, "relation")
60
+ document.adjust_ref
61
+ end
62
+
63
+ def read_passage(xml, passage)
64
+ passage.text = read_text(xml, "text")
65
+ passage.offset = read_int(xml, "offset")
66
+ read_infon(xml, passage)
67
+ read_recursive(xml, passage, "sentence")
68
+ read_recursive(xml, passage, "annotation")
69
+ read_recursive(xml, passage, "relation")
70
+ end
71
+
72
+ def read_sentence(xml, sentence)
73
+ sentence.text = read_text(xml, "text")
74
+ sentence.offset = read_int(xml, "offset")
75
+ read_infon(xml, sentence)
76
+ read_recursive(xml, sentence, "annotation")
77
+ read_recursive(xml, sentence, "relation")
78
+ end
79
+
80
+ def read_annotation(xml, annotation)
81
+ annotation.id = xml["id"]
82
+ annotation.text = read_text(xml, "text")
83
+ read_infon(xml, annotation)
84
+ read_recursive(xml, annotation, "location")
85
+ end
86
+
87
+ def read_relation(xml, relation)
88
+ relation.id = xml["id"]
89
+ read_infon(xml, relation)
90
+ read_recursive(xml, relation, "node")
91
+ end
92
+
93
+ def read_location(xml, location)
94
+ location.offset = xml["offset"]
95
+ location.length = xml["length"]
96
+ end
97
+
98
+ def read_node(xml, node)
99
+ node.refid = xml["refid"]
100
+ node.role = xml["role"]
101
+ end
102
+ end
@@ -0,0 +1,93 @@
1
+ require 'nokogiri'
2
+ Dir[File.dirname(__FILE__) + '/*.rb'].each {|file| require file }
3
+
4
+ module BioCWriter
5
+ module_function
6
+ def write(collection)
7
+ builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
8
+ write_collection(xml, collection)
9
+ end
10
+ builder.to_xml
11
+ end
12
+
13
+ def write_infon(xml, obj)
14
+ obj.infons.each do |k, v|
15
+ xml.infon(:key => k) {
16
+ xml.text v
17
+ }
18
+ end
19
+ end
20
+
21
+ def write_collection(xml, collection)
22
+ xml.collection {
23
+ xml.source collection.source
24
+ xml.date collection.date
25
+ xml.key collection.key
26
+ write_infon(xml, collection)
27
+ collection.documents.each{|d| write_document(xml, d)}
28
+ }
29
+ end
30
+
31
+ def write_document(xml, document)
32
+ xml.document {
33
+ xml.id_ document.id
34
+ write_infon(xml, document)
35
+ document.passages.each{|p| write_passage(xml, p)}
36
+ document.relations.each{|r| write_relation(xml, r)}
37
+ }
38
+ end
39
+
40
+ def write_passage(xml, passage)
41
+ xml.passage {
42
+ write_infon(xml, passage)
43
+ xml.offset passage.offset
44
+ xml.text_ passage.text unless passage.text.nil?
45
+ passage.sentences.each{|s| write_sentence(xml, s)}
46
+ passage.annotations.each{|a| write_annotation(xml, a)}
47
+ passage.relations.each{|r| write_relation(xml, r)}
48
+ }
49
+ end
50
+
51
+ def write_sentence(xml, sentence)
52
+ xml.sentence {
53
+ write_infon(xml, sentence)
54
+ xml.offset sentence.offset
55
+ xml.text_ sentence.text unless sentence.text.nil?
56
+ sentence.annotations.each{|a| write_annotation(xml, a)}
57
+ sentence.relations.each{|r| write_relation(xml, r)}
58
+ }
59
+ end
60
+
61
+ def write_annotation(xml, annotation)
62
+ if annotation.id.nil?
63
+ attribute = nil
64
+ else
65
+ attribute = {id: annotation.id}
66
+ end
67
+ xml.annotation(attribute) {
68
+ write_infon(xml, annotation)
69
+ xml.text_ annotation.text
70
+ annotation.locations.each{|l| write_location(xml, l)}
71
+ }
72
+ end
73
+
74
+ def write_relation(xml, relation)
75
+ if relation.id.nil?
76
+ attribute = nil
77
+ else
78
+ attribute = {id: relation.id}
79
+ end
80
+ xml.relation(attribute) {
81
+ write_infon(xml, relation)
82
+ relation.nodes.each{|n| write_node(xml, n)}
83
+ }
84
+ end
85
+
86
+ def write_location(xml, location)
87
+ xml.location(:offset => location.offset, :length => location.length)
88
+ end
89
+
90
+ def write_node(xml, node)
91
+ xml.node_(:refid => node.refid, :role => node.role)
92
+ end
93
+ end
@@ -0,0 +1,15 @@
1
+ class Collection
2
+ attr_accessor :documents, :infons, :source, :date, :key
3
+
4
+ def initialize
5
+ @documents = []
6
+ @infons = {}
7
+ @source = ""
8
+ @date = ""
9
+ @key = ""
10
+ end
11
+
12
+ def to_xml
13
+
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ class Document
2
+ attr_accessor :id, :infons, :passages, :relations
3
+ attr_reader :collection
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @passages = []
8
+ @relations = []
9
+ @collection = parent
10
+ end
11
+
12
+ def find_node(id)
13
+ relations.each{|r| return r if r.id == id}
14
+ passages.each do |p|
15
+ ret = p.find_node(id)
16
+ return ret unless ret.nil?
17
+ end
18
+ nil
19
+ end
20
+
21
+ def adjust_ref
22
+ each_relation{|r| r.adjust_ref}
23
+ end
24
+
25
+ def each_relation
26
+ relations.each{|r| yield r}
27
+ passages.each{|p| p.each_relation{|r| yield r}}
28
+ end
29
+ end
@@ -0,0 +1,10 @@
1
+ class Location
2
+ attr_accessor :offset, :length
3
+ attr_reader :annotation
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @locations = []
8
+ @annotation = parent
9
+ end
10
+ end
@@ -0,0 +1,12 @@
1
+ class Node
2
+ attr_accessor :refid, :role
3
+ attr_reader :ref, :relation
4
+
5
+ def initialize(parent)
6
+ @relation = parent
7
+ end
8
+
9
+ def adjust_ref
10
+ @ref = relation.document.find_node(refid)
11
+ end
12
+ end
@@ -0,0 +1,14 @@
1
+ class NodeBase
2
+ attr_accessor :id, :infons
3
+ attr_reader :document, :passage, :sentence
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @document = parent if parent.is_a? Document
8
+ @passage = parent if parent.is_a? Passage
9
+ @sentence = parent if parent.is_a? Sentence
10
+
11
+ @passage = @sentence.passage unless @sentence.nil?
12
+ @document = @passage.document unless @passage.nil?
13
+ end
14
+ end
@@ -0,0 +1,29 @@
1
+ class Passage
2
+ attr_accessor :offset, :text, :infons, :sentences, :annotations, :relations
3
+ attr_reader :document
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @sentences = []
8
+ @annotations = []
9
+ @relations = []
10
+ @document = parent
11
+ end
12
+
13
+ def to_s
14
+ "#{offset}:#{text}"
15
+ end
16
+ def find_node(id)
17
+ (relations+annotations).each{|n| return n if n.id == id}
18
+ sentences.each do |s|
19
+ ret = s.find_node(id)
20
+ return ret unless ret.nil?
21
+ end
22
+ nil
23
+ end
24
+
25
+ def each_relation
26
+ relations.each{|r| yield r}
27
+ sentences.each{|s| s.each_relation{|r| yield r}}
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ require 'simple_bioc/node_base'
2
+
3
+ class Relation < NodeBase
4
+ attr_accessor :nodes
5
+
6
+ def initialize(parent)
7
+ super(parent)
8
+ @nodes = []
9
+ end
10
+
11
+ def adjust_ref
12
+ nodes.each{|n| n.adjust_ref}
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ class Sentence
2
+ attr_accessor :offset, :text, :infons, :annotations, :relations
3
+ attr_reader :passage
4
+
5
+ def initialize(parent)
6
+ @infons = {}
7
+ @annotations = []
8
+ @relations = []
9
+ @passage = parent
10
+ end
11
+
12
+ def find_node(id)
13
+ (relations+annotations).each{|n| return n if n.id == id}
14
+ nil
15
+ end
16
+
17
+ def each_relation
18
+ relations.each{|r| yield r}
19
+ end
20
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleBioc
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,14 @@
1
+ require "simple_bioc/version"
2
+ require "simple_bioc/bioc_reader"
3
+ require "simple_bioc/bioc_writer"
4
+
5
+ module SimpleBioC
6
+ module_function
7
+ def from_xml(file_path)
8
+ BioCReader.read(file_path)
9
+ end
10
+
11
+ def to_xml(collection)
12
+ BioCWriter.write(collection)
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'simple_bioc/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "simple_bioc"
8
+ spec.version = SimpleBioc::VERSION
9
+ spec.authors = ["Dongseop Kwon"]
10
+ spec.email = ["dongseop@gmail.com"]
11
+ spec.description = "Simple BioC parser/builder for ruby. BioC is a 'A Minimalist Approach to Interoperability for Biomedical Text Processing' (http://www.ncbi.nlm.nih.gov/CBBresearch/Dogan/BioC/BioCHome.html)"
12
+ spec.summary = "Simple BioC parser/builder for ruby"
13
+ spec.homepage = "https://github.com/dongseop/simple_bioc"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency("nokogiri", [">= 1.3.2"])
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency("rspec-core", ["~> 2.2"])
26
+ spec.add_development_dependency("test-xml", ["~> 0.1.6"])
27
+ end
@@ -0,0 +1,14 @@
1
+ # bowling_spec.rb
2
+ require 'simple_bioc'
3
+ require 'test_xml/spec'
4
+ describe BioCReader do
5
+ it "should be load successfully" do
6
+ Dir["./xml/*.xml"].each do |file_path|
7
+ puts file_path
8
+ collection = SimpleBioC.from_xml(file_path)
9
+ output = SimpleBioC.to_xml(collection)
10
+ expected = File.read(file_path)
11
+ expect(output).to equal_xml(expected)
12
+ end
13
+ end
14
+ end