biointerchange 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/.travis.yml +12 -0
  4. data/Gemfile +17 -0
  5. data/LICENSE.txt +8 -0
  6. data/README.md +166 -0
  7. data/Rakefile +50 -0
  8. data/VERSION +1 -0
  9. data/bin/biointerchange +6 -0
  10. data/docs/exceptions_readme.txt +13 -0
  11. data/examples/BovineGenomeChrX.gff3.gz +0 -0
  12. data/examples/gb-2007-8-3-R40.xml +243 -0
  13. data/examples/pubannotation.json +1 -0
  14. data/generators/rdfxml.rb +104 -0
  15. data/lib/biointerchange/core.rb +195 -0
  16. data/lib/biointerchange/exceptions.rb +38 -0
  17. data/lib/biointerchange/genomics/gff3_feature.rb +82 -0
  18. data/lib/biointerchange/genomics/gff3_feature_set.rb +37 -0
  19. data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +107 -0
  20. data/lib/biointerchange/genomics/gff3_reader.rb +86 -0
  21. data/lib/biointerchange/gff3.rb +135 -0
  22. data/lib/biointerchange/reader.rb +25 -0
  23. data/lib/biointerchange/registry.rb +29 -0
  24. data/lib/biointerchange/sio.rb +7124 -0
  25. data/lib/biointerchange/sofa.rb +1566 -0
  26. data/lib/biointerchange/textmining/content.rb +69 -0
  27. data/lib/biointerchange/textmining/document.rb +36 -0
  28. data/lib/biointerchange/textmining/pdfx_xml_reader.rb +161 -0
  29. data/lib/biointerchange/textmining/process.rb +57 -0
  30. data/lib/biointerchange/textmining/pubannos_json_reader.rb +72 -0
  31. data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +197 -0
  32. data/lib/biointerchange/textmining/text_mining_reader.rb +41 -0
  33. data/lib/biointerchange/writer.rb +23 -0
  34. data/lib/biointerchange.rb +3 -0
  35. data/spec/exceptions_spec.rb +27 -0
  36. data/spec/gff3_rdfwriter_spec.rb +67 -0
  37. data/spec/text_mining_pdfx_xml_reader_spec.rb +89 -0
  38. data/spec/text_mining_pubannos_json_reader_spec.rb +71 -0
  39. data/spec/text_mining_rdfwriter_spec.rb +57 -0
  40. data/web/about.html +89 -0
  41. data/web/biointerchange.js +133 -0
  42. data/web/bootstrap/css/bootstrap-responsive.css +1040 -0
  43. data/web/bootstrap/css/bootstrap-responsive.min.css +9 -0
  44. data/web/bootstrap/css/bootstrap.css +5624 -0
  45. data/web/bootstrap/css/bootstrap.min.css +9 -0
  46. data/web/bootstrap/img/glyphicons-halflings-white.png +0 -0
  47. data/web/bootstrap/img/glyphicons-halflings.png +0 -0
  48. data/web/bootstrap/js/bootstrap.js +2027 -0
  49. data/web/bootstrap/js/bootstrap.min.js +6 -0
  50. data/web/bootstrap/js/jquery-1.8.1.min.js +2 -0
  51. data/web/css/rdoc-style.css +5786 -0
  52. data/web/css/rdoc.css +716 -0
  53. data/web/images/BioInterchange300.png +0 -0
  54. data/web/index.html +109 -0
  55. data/web/service/rdfizer.fcgi +68 -0
  56. data/web/webservices.html +123 -0
  57. metadata +240 -0
@@ -0,0 +1,69 @@
1
+ module BioInterchange::TextMining
2
+
3
+ class Content
4
+
5
+ # Constants that describe content types.
6
+ UNSPECIFIED = 0
7
+ DOCUMENT = 1
8
+ PAGE = 2
9
+ TITLE = 3
10
+ AUTHOR = 4
11
+ ABSTRACT = 5
12
+ SECTION = 6
13
+ PARAGRAPH = 7
14
+ SENTENCE = 8
15
+ PHRASE = 9
16
+ WORD = 10
17
+ CHARACTER = 11
18
+
19
+ # Creates a new document content representation at a specific location of the document.
20
+ #
21
+ # +offset+:: zero-based offset of the represented content within the document (absolute location within the document)
22
+ # +length+:: length of the represented content, where a length of zero denotes a boundary between two characters
23
+ # +type+:: classifaction of the content
24
+ def initialize(offset, length, type = UNSPECIFIED, process = nil)
25
+ @offset = offset
26
+ @length = length
27
+ @type = type
28
+ @process = process
29
+ end
30
+
31
+ # Sets the context of this content.
32
+ #
33
+ # +context+:: a +BioInterchange::TextMining::Document+ or +BioInterchange::TextMining::Content+ instance in which this content is enclosed in
34
+ def setContext(context)
35
+ @context = context
36
+ end
37
+
38
+ # Returns the offset of the content as absolute position within the document.
39
+ def offset
40
+ @offset
41
+ end
42
+
43
+ # Returns the length of the content, which is measured in characters.
44
+ def length
45
+ @length
46
+ end
47
+
48
+ # Returns the type of the content, if known, or +BioInterchange::TextMining::Content::UNSPECIFIED otherwise.
49
+ def type
50
+ @type
51
+ end
52
+
53
+ # Returns the process associated with this content, if previously provided, or nil otherwise.
54
+ def process
55
+ @process
56
+ end
57
+
58
+ # Returns a URI that identifies this content.
59
+ def uri
60
+ raise BioInterchange::Exceptions::ImplementationModelError, 'An URI can only be returned for content with a context (i.e., use setContext(context) first).' unless @context
61
+ process = '-'
62
+ process = "(#{@process.uri.sub(/^.*?:\/\//, '')})" if @process
63
+ "biointerchange://textmining/content/#{@context.uri.sub(/^.*?:\/\//, '')}/#{@offset},#{@length},#{@type},#{process}"
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+
@@ -0,0 +1,36 @@
1
+ module BioInterchange::TextMining
2
+
3
+ class Document
4
+
5
+ # Creates a blank document representation.
6
+ #
7
+ # +uri+:: source of the document
8
+ def initialize(uri)
9
+ @uri = uri
10
+
11
+ # Will hold content:
12
+ @content = []
13
+ end
14
+
15
+ # Returns the URI of the document.
16
+ def uri
17
+ @uri
18
+ end
19
+
20
+ # Adds content to the document.
21
+ #
22
+ # +content+:: content of type +BioInterchange::TextMining::Content+ that should be added to the document
23
+ def add(content)
24
+ raise BioInterchange::Exceptions::ImplementationModelError, 'Content has to be of kind BioInterchange::TextMining::Content' unless content.kind_of?(BioInterchange::TextMining::Content)
25
+ @content << content
26
+ end
27
+
28
+ # Returns the document contents as an array.
29
+ def contents
30
+ @content.clone.freeze
31
+ end
32
+
33
+ end
34
+
35
+ end
36
+
@@ -0,0 +1,161 @@
1
+ module BioInterchange::TextMining
2
+
3
+ require 'rexml/document'
4
+ require 'rexml/streamlistener'
5
+
6
+ class PdfxXmlReader < BioInterchange::TextMining::TMReader
7
+
8
+ # Reads input stream and returns associated +BioInterchange::TextMining::Document+ model
9
+ #
10
+ # Presently I assume a single document per xml file,
11
+ # and that <section> tags cannot nest. I also assume
12
+ # that a Content::DOCUMENT type is everything between
13
+ # the <article> tags.
14
+ #
15
+ # +inputstream+:: Input IO stream to deserialize
16
+ def deserialize(inputstream)
17
+
18
+ #super(inputstream)
19
+
20
+ raise BioInterchange::Exceptions::ImplementationReaderError, 'InputStream not of type IO, cannot read.' unless inputstream.kind_of?(IO) or inputstream.kind_of?(String)
21
+
22
+ @input = inputstream
23
+
24
+ pdfx
25
+
26
+ end
27
+
28
+
29
+
30
+ private
31
+
32
+ def pdfx
33
+ list = MyListener.new
34
+ REXML::Document.parse_stream(@input, list)
35
+ return list.document
36
+ end
37
+
38
+
39
+
40
+
41
+ class MyListener
42
+
43
+ include REXML::StreamListener
44
+
45
+ def initialize
46
+ @map = {}
47
+
48
+ #sections can nest, so "stack" them
49
+ @map['sec_s'] = []
50
+ @map['sec_l'] = []
51
+ end
52
+
53
+
54
+ def tag_start(name, attr)
55
+ #puts "tag_start: #{name}"
56
+ if name =~ /^job$/
57
+ raise BioInterchange::Exceptions::InputFormatError, 'Input document XML has multiple <job> tags, cannot parse multiple documents within a single file.' if @map['id_done']
58
+ @map['id'] = true
59
+ elsif name =~ /^article-title$/
60
+ raise BioInterchange::Exceptions::InputFormatError, 'Input document XML has multiple <article-title> tags defined, cannot parse multiple documents within a single file.' if @map['title_done']
61
+ @map['title'] = true
62
+ @map['title_s'] = @map['art_l']
63
+ @map['title_l'] = 0
64
+ elsif name =~ /^abstract$/
65
+ raise BioInterchange::Exceptions::InputFormatError, 'Input document XML has multiple <abstract> tags defined, cannot parse multiple documents within a single file.' if @map['abs_done']
66
+ @map['abs'] = true
67
+ @map['abs_s'] = @map['art_l']
68
+ @map['abs_l'] = 0
69
+ elsif name =~ /^body$/
70
+ raise BioInterchange::Exceptions::InputFormatError, 'Input document XML has multiple <body> tags defined, cannot parse multiple documents within a single file.' if @map['body_done']
71
+ @map['body'] = true
72
+ @map['body_s'] = @map['art_l']
73
+ @map['body_l'] = 0
74
+ elsif name =~ /^article$/
75
+ raise BioInterchange::Exceptions::InputFormatError, 'Input document XML has multiple <article> tags defined, cannot parse multiple documents within a single file.' if @map['art_done']
76
+ @map['art'] = true
77
+ @map['art_s'] = 0
78
+ @map['art_l'] = 0
79
+ elsif name =~ /^section$/
80
+ raise BioInterchange::Exceptions::InputFormatError, 'Error with section stack, stacks not equal in size: Possibly not a well formed XML input file. Check <section> tags all match up and do not overlap (nesting is fine).' unless @map['sec_s'].size == @map['sec_l'].size
81
+ @map['sec_s'].push @map['art_l']
82
+ @map['sec_l'].push 0
83
+ end
84
+ end
85
+
86
+
87
+ def text(data)
88
+
89
+ if @map['art']
90
+ @map['art_l'] += data.length
91
+ end
92
+
93
+ if @map['id']
94
+ @doc = BioInterchange::TextMining::Document.new("http://pdfx.cs.man.ac.uk/" + data)
95
+ elsif @map['title']
96
+ @map['title_l'] += data.length
97
+ elsif @map['abs']
98
+ @map['abs_l'] += data.length
99
+ end
100
+ if @map['body']
101
+ @map['body_l'] += data.length
102
+ end
103
+ if @map['sec_l'].size != 0
104
+ #add length to *all* current sections
105
+ @map['sec_l'].size.times do |i|
106
+ @map['sec_l'][i] += data.length
107
+ end
108
+ end
109
+
110
+ end
111
+
112
+ #TODO add deal with <author> type tags
113
+
114
+ def tag_end(name)
115
+ #puts "tag_end: #{name}"
116
+ if name =~ /^job$/
117
+ @map['id'] = false
118
+ @map['id_done'] = true
119
+ elsif name =~ /^article-title$/
120
+ @map['title'] = false
121
+ dc = BioInterchange::TextMining::Content.new(@map['title_s'], @map['title_l'], BioInterchange::TextMining::Content::TITLE, @process)
122
+ dc.setContext(@doc)
123
+ @doc.add(dc)
124
+ @map['title_done'] = true
125
+ elsif name =~ /^abstract$/
126
+ @map['abs'] = false
127
+ dc = BioInterchange::TextMining::Content.new(@map['abs_s'], @map['abs_l'], BioInterchange::TextMining::Content::ABSTRACT, @process)
128
+ dc.setContext(@doc)
129
+ @doc.add(dc)
130
+ @map['abs_done'] = true
131
+ elsif name =~ /^body$/
132
+ @map['body'] = false
133
+ dc = BioInterchange::TextMining::Content.new(@map['body_s'], @map['body_l'], BioInterchange::TextMining::Content::SECTION, @process)
134
+ dc.setContext(@doc)
135
+ @doc.add(dc)
136
+ @map['body_done'] = true
137
+ elsif name =~ /^article$/
138
+ @map['art'] = false
139
+ dc = BioInterchange::TextMining::Content.new(@map['art_s'], @map['art_l'], BioInterchange::TextMining::Content::DOCUMENT, @process)
140
+ dc.setContext(@doc)
141
+ @doc.add(dc)
142
+ @map['art_done'] = true
143
+ elsif name =~ /^section$/
144
+ raise 'Error with section stack, stacks not equal in size' unless @map['sec_s'].size == @map['sec_l'].size
145
+ dc = BioInterchange::TextMining::Content.new(@map['sec_s'].pop, @map['sec_l'].pop, BioInterchange::TextMining::Content::SECTION, @process)
146
+ dc.setContext(@doc)
147
+ @doc.add(dc)
148
+ end
149
+ end
150
+
151
+ def document
152
+ @doc
153
+ end
154
+
155
+ end
156
+
157
+
158
+
159
+ end
160
+
161
+ end
@@ -0,0 +1,57 @@
1
+ module BioInterchange::TextMining
2
+
3
+ class Process
4
+
5
+ # Constants that describe process types.
6
+ UNSPECIFIED = 0
7
+ SOFTWARE = 1
8
+ WEBSERVICE = 2
9
+ MANUAL = 3
10
+
11
+ # Dictionary of viable metadata keys.
12
+ VERSION = 0
13
+
14
+ # Creates a new process representation.
15
+ #
16
+ # +name+:: identification of the software/service/curator involved in the process, e.g. "ABNER" or "Peter Smith"
17
+ # +uri+:: details about the processes origin, e.g. "http://pages.cs.wisc.edu/~bsettles/abner/" or "peter.smith@example.org"
18
+ # +date+:: date-time when the process was carried out, which may be nil if the date-time is unknown
19
+ # +type+:: classification of the described process
20
+ # +metadata+:: a hash that holds additional information about the process via dictionary defined keywords, e.g. { Process::VERSION => '0.0.1alpha' }
21
+ def initialize(name, uri, type = UNSPECIFIED, metadata = {}, date = nil)
22
+ @name = name
23
+ @uri = uri
24
+ @date = date
25
+ @type = type
26
+ @metadata = metadata
27
+ end
28
+
29
+ # Returns the name of the process.
30
+ def name
31
+ @name
32
+ end
33
+
34
+ # Returns the URI that has further details about the process, which can also be the form of an email address
35
+ # in cases where the process describes human driven annotation.
36
+ def uri
37
+ @uri
38
+ end
39
+
40
+ # Returns the date-time when this process was carried out, or nil otherwise if the information is not available.
41
+ def date
42
+ @date
43
+ end
44
+
45
+ # Returns the type of this process.
46
+ def type
47
+ @type
48
+ end
49
+
50
+ # Returns additional meta-data associated with this process.
51
+ def metadata
52
+ @metadata.clone.freeze
53
+ end
54
+
55
+ end
56
+
57
+ end
@@ -0,0 +1,72 @@
1
+ module BioInterchange::TextMining
2
+
3
+ require 'rubygems'
4
+ require 'json'
5
+
6
+ class PubannosJsonReader < BioInterchange::TextMining::TMReader
7
+
8
+ def deserialize(inputstream)
9
+ if inputstream.kind_of?(IO) then
10
+ pubannos(inputstream.read)
11
+ elsif inputstream.kind_of?(String) then
12
+ pubannos(inputstream)
13
+ else
14
+ #else raise exception
15
+ super(inputstream)
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ # Specific method for parsing of *Pubannotations* json format
22
+ def pubannos(data)
23
+
24
+ result = JSON.parse(data)
25
+
26
+ if result.has_key? 'Error'
27
+ raise BioInterchange::Exceptions::InputFormatError, 'Error parsing the JSON input file: #{result["Error"]}'
28
+ end
29
+
30
+
31
+ text = result['text']
32
+ #doc_uri = "http://pubannotation.dbcls.jp/pmdocs/" + result['pmid'].to_s
33
+ doc_uri = result['docurl']
34
+
35
+ doc = Document.new(doc_uri)
36
+ docContent = Content.new(0, text.length, Content::DOCUMENT, @process)
37
+ docContent.setContext(doc)
38
+ doc.add(docContent)
39
+
40
+ #so our document requires content of type document or abstract
41
+ #should it hold the content string?
42
+
43
+ if result['catanns']
44
+ result['catanns'].each do |annot|
45
+ start_offset = annot['begin']
46
+ end_offset = annot['end']
47
+ length = end_offset - start_offset
48
+ created_time = annot['created_at']
49
+ updated_time = annot['updated_at']
50
+ category = annot['category']
51
+ #annset_id = annot['annset_id']
52
+ #doc_id = annot['doc_id']
53
+ #id = annot['id']
54
+
55
+ entity = text.slice(start_offset..end_offset)
56
+
57
+ #phrase = type for NE
58
+ con = Content.new(start_offset, length, Content::PHRASE, @process)
59
+ con.setContext(doc)
60
+ doc.add(con)
61
+
62
+ #set process.date = updated_time?
63
+ end
64
+ end
65
+
66
+ doc
67
+ end
68
+
69
+
70
+ end
71
+
72
+ end
@@ -0,0 +1,197 @@
1
+ require 'rdf'
2
+ require 'rdf/ntriples'
3
+ require 'date'
4
+
5
+ module BioInterchange::TextMining
6
+
7
+ class RDFWriter < BioInterchange::Writer
8
+
9
+ # Creates a new instance of a RDFWriter that will use the provided output stream to serialize RDF.
10
+ #
11
+ # +ostream+:: instance of an IO class or derivative that is used for RDF serialization
12
+ def initialize(ostream)
13
+ raise BioInterchange::Exceptions::ImplementationWriterError, 'The output stream is not an instance of IO or its subclasses.' unless ostream.kind_of?(IO)
14
+ @ostream = ostream
15
+ end
16
+
17
+ # Serializes a model as RDF.
18
+ #
19
+ # +model+:: a generic representation of input data that is derived from BioInterchange::TextMining::Document
20
+ def serialize(model)
21
+ if model.instance_of?(BioInterchange::TextMining::Document) then
22
+ serialize_document(model)
23
+ else
24
+ raise BioInterchange::Exceptions::ImplementationWriterError, 'The provided model cannot be serialized at the moment. ' +
25
+ 'Supported classes are BioInterchange::TextMining::Document (and that\'s it for now).'
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ # Generates an URI for a given process and its contents.
32
+ #
33
+ # +process+:: process instance
34
+ # +kind+:: kind of the URI that should be generated, for example, whether the URI should represent the name, date, etc.
35
+ def process_uri(process, kind)
36
+ base_uri = 'biointerchange://textmining/process'
37
+ case kind
38
+ when :process
39
+ RDF::URI.new("#{base_uri}/self/#{process.uri.sub(/^.*?:\/\//, '')}")
40
+ when :name
41
+ RDF::URI.new("#{base_uri}/name/#{process.uri.sub(/^.*?:\/\//, '')}")
42
+ when :uri
43
+ RDF::URI.new("#{base_uri}/uri/#{process.uri.sub(/^.*?:\/\//, '')}")
44
+ when :date
45
+ RDF::URI.new("#{base_uri}/date/#{process.uri.sub(/^.*?:\/\//, '')}")
46
+ else
47
+ raise BioInterchange::Exceptions::ImplementationWriterError, "There is no implementation for serializing a process as #{kind}."
48
+ end
49
+ end
50
+
51
+ # Generates an URI for a given content and its contents.
52
+ #
53
+ # +content+:: content instance
54
+ # +kind+:: kind of the URI that should be generated, for example, whether the URI should represent the name, date, etc.
55
+ def content_uri(content, kind)
56
+ base_uri = 'biointerchange://textmining/content'
57
+ case kind
58
+ #when :content
59
+ # RDF::URI.new("#{base_uri}/self/#{content.uri.sub(/^.*?:\/\//, '')}")
60
+ when :start
61
+ RDF::URI.new("#{base_uri}/start/#{content.uri.sub(/^.*?:\/\//, '')}")
62
+ when :stop
63
+ RDF::URI.new("#{base_uri}/stop/#{content.uri.sub(/^.*?:\/\//, '')}")
64
+ #when :type
65
+ # RDF::URI.new("#{base_uri}/type/#{content.uri.sub(/^.*?:\/\//, '')}")
66
+ else
67
+ raise BioInterchange::Exceptions::ImplementationWriterError, "There is no implementation for serializing a content as #{kind}."
68
+ end
69
+ end
70
+
71
+ # Serializes RDF for a textual document representation using the Semanticsciene Integrated Ontology
72
+ # (http://code.google.com/p/semanticscience/wiki/SIO).
73
+ #
74
+ # +model+:: an instance of +BioInterchange::TextMining::Document+
75
+ def serialize_document(model)
76
+ graph = RDF::Graph.new
77
+ document_uri = RDF::URI.new(model.uri)
78
+ graph.insert(RDF::Statement.new(document_uri, RDF.type, BioInterchange::SIO.document))
79
+ model.contents.each { |content|
80
+ serialize_content(graph, document_uri, content)
81
+ }
82
+ RDF::NTriples::Writer.dump(graph, @ostream)
83
+ end
84
+
85
+
86
+ # Serializes a Content object for a given document URI.
87
+ #
88
+ # +graph+:: RDF graph to which content is added
89
+ # +document_uri+:: the document URI to which the added content belongs to
90
+ # +content+:: an instance that describes the content
91
+ def serialize_content(graph, document_uri, content)
92
+ content_uri = RDF::URI.new(content.uri)
93
+ graph.insert(RDF::Statement.new(document_uri, BioInterchange::SIO.has_attribute, content_uri))
94
+ serialize_process(graph, document_uri, content_uri, content.process) if content.process
95
+
96
+ sio_url = BioInterchange::SIO.language_entity
97
+ case content.type
98
+ when Content::UNSPECIFIED
99
+ sio_url = BioInterchange::SIO.language_entity
100
+ when Content::DOCUMENT
101
+ sio_url = BioInterchange::SIO.document
102
+ when Content::PAGE
103
+ sio_url = BioInterchange::SIO.document_section
104
+ when Content::TITLE
105
+ sio_url = BioInterchange::SIO.title
106
+ when Content::AUTHOR
107
+ sio_url = BioInterchange::SIO.author_section
108
+ when Content::ABSTRACT
109
+ sio_url = BioInterchange::SIO.abstract_section
110
+ when Content::SECTION
111
+ sio_url = BioInterchange::SIO.document_section
112
+ when Content::PARAGRAPH
113
+ sio_url = BioInterchange::SIO.paragraph
114
+ when Content::SENTENCE
115
+ sio_url = BioInterchange::SIO.sentence
116
+ when Content::PHRASE
117
+ sio_url = BioInterchange::SIO.phrase
118
+ when Content::WORD
119
+ sio_url = BioInterchange::SIO.word
120
+ when Content::CHARACTER
121
+ sio_url = BioInterchange::SIO.character
122
+ end
123
+
124
+ graph.insert(RDF::Statement.new(content_uri, RDF.type, sio_url))
125
+
126
+ graph.insert(RDF::Statement.new(content_uri, BioInterchange::SIO.has_attribute, serialize_content_start(graph, document_uri, content_uri, content)))
127
+ graph.insert(RDF::Statement.new(content_uri, BioInterchange::SIO.has_attribute, serialize_content_stop(graph, document_uri, content_uri, content)))
128
+
129
+ end
130
+
131
+ # Serializes a process object for a specific document uri
132
+ #
133
+ #
134
+ def serialize_process(graph, document_uri, content_uri, process)
135
+ process_uri = process_uri(process, :process)
136
+ graph.insert(RDF::Statement.new(content_uri, BioInterchange::SIO.is_direct_part_of, process_uri))
137
+ # If this is an email address, then create a FOAF representation, otherwise, do something else:
138
+ if process.type == BioInterchange::TextMining::Process::MANUAL then
139
+ graph.insert(RDF::Statement.new(process_uri, RDF.type, RDF::FOAF.Person))
140
+ graph.insert(RDF::Statement.new(process_uri, RDF::FOAF.name, RDF::Literal.new(process.name)))
141
+ graph.insert(RDF::Statement.new(process_uri, RDF::FOAF.name, RDF::URI.new(process.uri)))
142
+ else
143
+ graph.insert(RDF::Statement.new(process_uri, RDF.type, BioInterchange::SIO.process))
144
+ graph.insert(RDF::Statement.new(process_uri, BioInterchange::SIO.has_attribute, serialize_process_name(graph, document_uri, content_uri, process_uri, process)))
145
+ graph.insert(RDF::Statement.new(process_uri, BioInterchange::SIO.has_attribute, serialize_process_uri(graph, document_uri, content_uri, process_uri, process)))
146
+ graph.insert(RDF::Statement.new(process_uri, BioInterchange::SIO.has_attribute, serialize_process_date(graph, document_uri, content_uri, process_uri, process))) if process.date
147
+ end
148
+ end
149
+
150
+ #
151
+ #
152
+ #
153
+ def serialize_process_name(graph, document_uri, content_uri, process_uri, process)
154
+ kind_uri = process_uri(process, :name)
155
+ graph.insert(RDF::Statement.new(kind_uri, RDF.type, BioInterchange::SIO.name))
156
+ graph.insert(RDF::Statement.new(kind_uri, BioInterchange::SIO.has_attribute, RDF::Literal.new("#{process.name}")))
157
+ end
158
+
159
+ #
160
+ #
161
+ #
162
+ def serialize_process_uri(graph, document_uri, content_uri, process_uri, process)
163
+ kind_uri = process_uri(process, :uri)
164
+ graph.insert(RDF::Statement.new(kind_uri, RDF.type, BioInterchange::SIO.software_entity))
165
+ graph.insert(RDF::Statement.new(kind_uri, BioInterchange::SIO.has_attribute, RDF::URI.new(process.uri)))
166
+ end
167
+
168
+ #
169
+ #
170
+ #
171
+ def serialize_process_date(graph, document_uri, content_uri, process_uri, process)
172
+ kind_uri = process_uri(process, :date)
173
+ graph.insert(RDF::Statement.new(kind_uri, RDF::DC.date, RDF::Literal.new(Date.parse(process.date))))
174
+ end
175
+
176
+ #
177
+ #
178
+ #
179
+ def serialize_content_start(graph, document_uri, content_uri, content)
180
+ kind_uri = content_uri(content, :start)
181
+ graph.insert(RDF::Statement.new(kind_uri, RDF::type, BioInterchange::SIO.start_position))
182
+ graph.insert(RDF::Statement.new(kind_uri, BioInterchange::SIO.has_attribute, RDF::Literal.new(content.offset)))
183
+ end
184
+
185
+ #
186
+ #
187
+ #
188
+ def serialize_content_stop(graph, document_uri, content_uri, content)
189
+ kind_uri = content_uri(content, :stop)
190
+ graph.insert(RDF::Statement.new(kind_uri, RDF::type, BioInterchange::SIO.stop_position))
191
+ graph.insert(RDF::Statement.new(kind_uri, BioInterchange::SIO.has_attribute, RDF::Literal.new((content.offset+content.length).to_s)))
192
+ end
193
+
194
+ end
195
+
196
+ end
197
+
@@ -0,0 +1,41 @@
1
+ module BioInterchange::TextMining
2
+
3
+ class TMReader < BioInterchange::Reader
4
+
5
+ # Create a new instance of a text-mining data reader. Sets @process to a new +BioInterchange::TextMining::Process+ object.
6
+ #
7
+ # +name+:: Name of the process which generated this data
8
+ # +name_uri+:: URI of the resource that generated this data
9
+ # +date+:: Optional date of data creation
10
+ # +processtype+:: Type of process that created this content
11
+ # +version+:: Optional version number of resource that created this data (nil if manually curated, for example).
12
+ def initialize(name, name_uri, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
13
+
14
+ metadata = {}
15
+ metadata[BioInterchange::TextMining::Process::VERSION] = version
16
+ @process = BioInterchange::TextMining::Process.new(name, name_uri, processtype, metadata, date)
17
+
18
+ end
19
+
20
+
21
+ # Reads input stream and returns associated model. Super call this method before before overriding to provide access to a @data string containing the inputstream text.
22
+ #
23
+ # +inputstream+:: Input IO stream to deserialize
24
+ def deserialize(inputstream)
25
+ raise BioInterchange::Exceptions::ImplementationReaderError, 'InputStream not of type IO, cannot read.' unless inputstream.kind_of?(IO)
26
+ end
27
+
28
+ # Automatically tries to determine a suitable process from the given name ID, which is assumed
29
+ # to be either an email address or web-site.
30
+ #
31
+ # +name_id+:: name ID that we want to represent by a suitable process
32
+ def self.determine_process(name_id)
33
+ process = BioInterchange::TextMining::Process::UNSPECIFIED
34
+ process = BioInterchange::TextMining::Process::MANUAL if name_id.match(/[^@]+@[^@]+/)
35
+ process = BioInterchange::TextMining::Process::SOFTWARE if name_id.match(/[a-zA-Z]+:\/\//)
36
+ process
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,23 @@
1
+ module BioInterchange
2
+
3
+ class Writer
4
+
5
+ # Creates a new instance of a writer that will use the provided output stream to serialize object model instances.
6
+ #
7
+ # +ostream+:: instance of IO or derivative class
8
+ def initialize(ostream)
9
+ raise BioInterchange::Exceptions::ImplementationWriterError, 'The output stream is not an instance of IO or its subclasses.' unless ostream.kind_of?(IO)
10
+ @ostream = ostream
11
+ end
12
+
13
+ # Serializes an object model instance.
14
+ #
15
+ # +model+:: an object model instance
16
+ def serialize(model)
17
+ raise BioInterchange::Exceptions::ImplementationWriterError, 'You must implement this method, which takes an object model and serializes it into the previously provided output stream.'
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+
@@ -0,0 +1,3 @@
1
+
2
+ require 'biointerchange/core'
3
+
@@ -0,0 +1,27 @@
1
+
2
+ require 'rspec'
3
+
4
+ load 'lib/biointerchange/core.rb'
5
+ load 'lib/biointerchange/exceptions.rb'
6
+
7
+ describe BioInterchange::Exceptions do
8
+ describe 'error and exception creation tests' do
9
+ it 'raising of InputFormatError' do
10
+ expect { raise BioInterchange::Exceptions::InputFormatError }.to raise_error(BioInterchange::Exceptions::BioInterchangeError)
11
+ end
12
+
13
+ it 'raising of ImplementationReaderError' do
14
+ expect { raise BioInterchange::Exceptions::ImplementationReaderError }.to raise_error(BioInterchange::Exceptions::ImplementationError)
15
+ end
16
+
17
+ it 'raising of ImplementationModelError' do
18
+ expect { raise BioInterchange::Exceptions::ImplementationModelError }.to raise_error(BioInterchange::Exceptions::ImplementationError)
19
+ end
20
+
21
+ it 'raising of ImplementationWriterError' do
22
+ expect { raise BioInterchange::Exceptions::ImplementationWriterError }.to raise_error(BioInterchange::Exceptions::ImplementationError)
23
+ end
24
+
25
+ end
26
+ end
27
+