biointerchange 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +49 -4
- data/VERSION +1 -1
- data/examples/chromosome_BF.gff +1701 -0
- data/examples/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf +4326 -0
- data/examples/pubannotation.10096561.json +1 -0
- data/examples/{pubannotation.json → pubannotation.10096561.json.old} +0 -0
- data/examples/pubannotation.2626671.json +1 -0
- data/lib/biointerchange/core.rb +58 -16
- data/lib/biointerchange/genomics/gff3_feature.rb +1 -0
- data/lib/biointerchange/genomics/gff3_feature_set.rb +31 -1
- data/lib/biointerchange/genomics/gff3_pragmas.rb +35 -0
- data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +60 -23
- data/lib/biointerchange/genomics/gff3_reader.rb +74 -40
- data/lib/biointerchange/genomics/gvf_feature.rb +24 -0
- data/lib/biointerchange/genomics/gvf_feature_set.rb +14 -0
- data/lib/biointerchange/genomics/gvf_pragmas.rb +6 -0
- data/lib/biointerchange/genomics/gvf_reader.rb +37 -0
- data/lib/biointerchange/gff3o.rb +1 -1
- data/lib/biointerchange/gvf1o.rb +145 -17
- data/lib/biointerchange/textmining/content.rb +1 -0
- data/lib/biointerchange/textmining/content_connection.rb +74 -0
- data/lib/biointerchange/textmining/document.rb +3 -1
- data/lib/biointerchange/textmining/pubannos_json_reader.rb +87 -9
- data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +58 -2
- data/spec/gff3_rdfwriter_spec.rb +9 -1
- data/spec/gvf_rdfwriter_spec.rb +81 -0
- data/spec/text_mining_pubannos_json_reader_spec.rb +82 -10
- data/spec/text_mining_rdfwriter_spec.rb +11 -0
- data/web/api.html +30 -23
- metadata +156 -138
@@ -21,7 +21,9 @@ class Document
|
|
21
21
|
#
|
22
22
|
# +content+:: content of type +BioInterchange::TextMining::Content+ that should be added to the document
|
23
23
|
def add(content)
|
24
|
-
|
24
|
+
if ( (! content.kind_of?(BioInterchange::TextMining::Content)) && (! content.kind_of?(BioInterchange::TextMining::ContentConnection)) )
|
25
|
+
raise BioInterchange::Exceptions::ImplementationModelError, 'Content has to be of kind BioInterchange::TextMining::Content or kind BioInterchange::TextMining::ContentConnection'
|
26
|
+
end
|
25
27
|
@content << content
|
26
28
|
end
|
27
29
|
|
@@ -27,7 +27,7 @@ private
|
|
27
27
|
raise BioInterchange::Exceptions::InputFormatError, 'Error parsing the JSON input file: #{result["Error"]}'
|
28
28
|
end
|
29
29
|
|
30
|
-
|
30
|
+
|
31
31
|
text = result['text']
|
32
32
|
#doc_uri = "http://pubannotation.dbcls.jp/pmdocs/" + result['pmid'].to_s
|
33
33
|
doc_uri = result['docurl']
|
@@ -39,18 +39,25 @@ private
|
|
39
39
|
|
40
40
|
#so our document requires content of type document or abstract
|
41
41
|
#should it hold the content string?
|
42
|
-
|
42
|
+
|
43
|
+
#hash to remember annotation in case they are needed for building upon based on ids later
|
44
|
+
contents = {}
|
45
|
+
|
43
46
|
if result['catanns']
|
44
47
|
result['catanns'].each do |annot|
|
45
|
-
start_offset =
|
46
|
-
end_offset =
|
48
|
+
start_offset = 0
|
49
|
+
end_offset = 0
|
50
|
+
if annot['span']
|
51
|
+
start_offset = annot['span']['begin']
|
52
|
+
end_offset = annot['span']['end']
|
53
|
+
elsif annot['begin'] and annot['end']
|
54
|
+
start_offset = annot['begin']
|
55
|
+
end_offset = annot['end']
|
56
|
+
end
|
47
57
|
length = end_offset - start_offset
|
48
|
-
|
49
|
-
updated_time = annot['updated_at']
|
58
|
+
|
50
59
|
category = annot['category']
|
51
|
-
|
52
|
-
#doc_id = annot['doc_id']
|
53
|
-
#id = annot['id']
|
60
|
+
id = annot['id']
|
54
61
|
|
55
62
|
entity = text.slice(start_offset..end_offset)
|
56
63
|
|
@@ -58,11 +65,82 @@ private
|
|
58
65
|
con = Content.new(start_offset, length, Content::PHRASE, @process)
|
59
66
|
con.setContext(doc)
|
60
67
|
doc.add(con)
|
68
|
+
|
69
|
+
contents[id] = con
|
61
70
|
|
62
71
|
#set process.date = updated_time?
|
63
72
|
end
|
64
73
|
end
|
65
74
|
|
75
|
+
if result['insanns']
|
76
|
+
result['insanns'].each do |annot|
|
77
|
+
|
78
|
+
#unsure what to do about this (con1), 'E1' is the ID of something not created yet.
|
79
|
+
#it is perhaps a case of making a new content, but with what params...?
|
80
|
+
#need to conform what this is refering to with JDK
|
81
|
+
con1 = nil
|
82
|
+
con2 = contents[annot['object']]
|
83
|
+
|
84
|
+
#get annotation type
|
85
|
+
type = ContentConnection::UNSPECIFIED
|
86
|
+
case annot['type']
|
87
|
+
when 'subClassOf'
|
88
|
+
type = ContentConnection::SUBCLASS
|
89
|
+
end
|
90
|
+
connection = ContentConnection.new(con1, con2, type, @process)
|
91
|
+
connection.setContext(doc)
|
92
|
+
doc.add(connection)
|
93
|
+
|
94
|
+
contents[annot['id']] = connection
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
if result['relanns']
|
100
|
+
result['relanns'].each do |annot|
|
101
|
+
con1 = contents[annot['subject']]
|
102
|
+
con2 = contents[annot['object']]
|
103
|
+
|
104
|
+
#get annotation type
|
105
|
+
type = ContentConnection::UNSPECIFIED
|
106
|
+
case annot['type']
|
107
|
+
when 'equivalentTo'
|
108
|
+
type = ContentConnection::EQUIVALENCE
|
109
|
+
when 'themeOf'
|
110
|
+
type = ContentConnection::THEME
|
111
|
+
end
|
112
|
+
connection = ContentConnection.new(con1, con2, type, @process)
|
113
|
+
connection.setContext(doc)
|
114
|
+
doc.add(connection)
|
115
|
+
|
116
|
+
contents[annot['id']] = connection
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
if result['modanns']
|
122
|
+
result['modanns'].each do |annot|
|
123
|
+
|
124
|
+
#in this case, it is a modification of an already existing content object (speculation/negation).
|
125
|
+
con = contents[annot['object']]
|
126
|
+
|
127
|
+
#get annotation type
|
128
|
+
type = ContentConnection::UNSPECIFIED
|
129
|
+
case annot['type']
|
130
|
+
when 'Speculation'
|
131
|
+
type = ContentConnection::SPECULATION
|
132
|
+
when 'Negation'
|
133
|
+
type = ContentConnection::NEGATION
|
134
|
+
end
|
135
|
+
connection = ContentConnection.new(con, nil, type, @process)
|
136
|
+
connection.setContext(doc)
|
137
|
+
doc.add(connection)
|
138
|
+
|
139
|
+
contents[annot['id']] = connection
|
140
|
+
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
66
144
|
doc
|
67
145
|
end
|
68
146
|
|
@@ -10,7 +10,7 @@ class RDFWriter < BioInterchange::Writer
|
|
10
10
|
#
|
11
11
|
# +ostream+:: instance of an IO class or derivative that is used for RDF serialization
|
12
12
|
def initialize(ostream)
|
13
|
-
raise BioInterchange::Exceptions::ImplementationWriterError, 'The output stream is not an instance of IO or its subclasses.' unless ostream.kind_of?(IO)
|
13
|
+
raise BioInterchange::Exceptions::ImplementationWriterError, 'The output stream is not an instance of IO or its subclasses.' unless ostream.kind_of?(IO) || ostream.kind_of?(StringIO)
|
14
14
|
@ostream = ostream
|
15
15
|
end
|
16
16
|
|
@@ -68,6 +68,22 @@ private
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
+
# Generates an URI for a given contentconnection and its contents.
|
72
|
+
#
|
73
|
+
# +contentcon+:: content connection instance
|
74
|
+
# +kind+:: kind of the URI that should be generated, for example, whether the URI should represent the name, date, etc.
|
75
|
+
def content_connection_uri(contentcon, kind)
|
76
|
+
base_uri = 'biointerchange://textmining/content_connection'
|
77
|
+
case kind
|
78
|
+
when :start
|
79
|
+
RDF::URI.new("#{base_uri}/start/#{content.uri.sub(/^.*?:\/\//, '')}")
|
80
|
+
when :stop
|
81
|
+
RDF::URI.new("#{base_uri}/stop/#{content.uri.sub(/^.*?:\/\//, '')}")
|
82
|
+
else
|
83
|
+
raise BioInterchange::Exceptions::ImplementationWriterError, "There is no implementation for serializing a content as #{kind}."
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
71
87
|
# Serializes RDF for a textual document representation using the Semanticsciene Integrated Ontology
|
72
88
|
# (http://code.google.com/p/semanticscience/wiki/SIO).
|
73
89
|
#
|
@@ -77,7 +93,13 @@ private
|
|
77
93
|
document_uri = RDF::URI.new(model.uri)
|
78
94
|
graph.insert(RDF::Statement.new(document_uri, RDF.type, BioInterchange::SIO.document))
|
79
95
|
model.contents.each { |content|
|
80
|
-
|
96
|
+
if content.kind_of?(BioInterchange::TextMining::Content)
|
97
|
+
serialize_content(graph, document_uri, content)
|
98
|
+
elsif content.kind_of?(BioInterchange::TextMining::ContentConnection)
|
99
|
+
serialize_contentconnection(graph, document_uri, content)
|
100
|
+
else
|
101
|
+
raise BioInterchange::Exceptions::ImplementationWriterError, "Can only serialize Content and ContentConnection from a Document."
|
102
|
+
end
|
81
103
|
}
|
82
104
|
RDF::NTriples::Writer.dump(graph, @ostream)
|
83
105
|
end
|
@@ -128,6 +150,40 @@ private
|
|
128
150
|
|
129
151
|
end
|
130
152
|
|
153
|
+
# Serializes a ContentConnection object for a given document URI.
|
154
|
+
#
|
155
|
+
# +graph+:: RDF graph to which content is added
|
156
|
+
# +document_uri+:: the document URI to which the added content belongs to
|
157
|
+
# +content+:: an instance that describes the content
|
158
|
+
def serialize_contentconnection(graph, document_uri, contentcon)
|
159
|
+
contentcon_uri = RDF::URI.new(contentcon.uri)
|
160
|
+
graph.insert(RDF::Statement.new(document_uri, BioInterchange::SIO.has_attribute, contentcon_uri))
|
161
|
+
serialize_process(graph, document_uri, contentcon_uri, contentcon.process) if contentcon.process
|
162
|
+
|
163
|
+
|
164
|
+
#TODO these sio tags need confirming - there are here as a initial proof of concept
|
165
|
+
#next issue, some of these are relations and some are labels, need to separate out which
|
166
|
+
#I seem to recall that the only relationship types that should be used are "has_attribute" and "RDF::type", in which case these need adjusting for that.
|
167
|
+
#I presume this'd mean making a "has_attribute" link between the content1 and the contentconnection relationship in some way.
|
168
|
+
case contentcon.type
|
169
|
+
when ContentConnection::UNSPECIFIED
|
170
|
+
graph.insert(RDF::Statement.new(contentcon.content1.uri, BioInterchange::SIO.has_attribute, BioInterchange::SIO.language_entity))
|
171
|
+
when ContentConnection::EQUIVALENCE
|
172
|
+
graph.insert(RDF::Statement.new(contentcon.content1.uri, BioInterchange::SIO.is_equal_to, contentcon.content2.uri))
|
173
|
+
when ContentConnection::SUBCLASS
|
174
|
+
#TODO this class needs more information, the relationship is between a contentcon.content, and 'something'... I've yet to work out what
|
175
|
+
graph.insert(RDF::Statement.new(contentcon.content2.uri, BioInterchange::SIO.has_attribute, BioInterchange::SIO.in_relation_to))
|
176
|
+
when ContentConnection::THEME
|
177
|
+
#TODO there are other more specific options for this that need investigating as options.
|
178
|
+
graph.insert(RDF::Statement.new(contentcon.content1.uri, BioInterchange::SIO.has_target, contentcon.content2.uri))
|
179
|
+
when ContentConnection::SPECULATION
|
180
|
+
graph.insert(RDF::Statement.new(contentcon.content1.uri, BioInterchange::SIO.has_attribute, BioInterchange::SIO.speculation))
|
181
|
+
when ContentConnection::NEGATION
|
182
|
+
graph.insert(RDF::Statement.new(contentcon.content1.uri, BioInterchange::SIO.denotes, BioInterchange::SIO.negative_regulation))
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|
186
|
+
|
131
187
|
# Serializes a process object for a specific document uri
|
132
188
|
#
|
133
189
|
#
|
data/spec/gff3_rdfwriter_spec.rb
CHANGED
@@ -66,7 +66,15 @@ describe BioInterchange::Genomics::RDFWriter do
|
|
66
66
|
set.add(feature)
|
67
67
|
BioInterchange::Genomics::RDFWriter.new(ostream).serialize(set)
|
68
68
|
ostream.close
|
69
|
-
istream.read.lines
|
69
|
+
lines = istream.read.lines
|
70
|
+
feature_no = 0
|
71
|
+
lines.each { |line|
|
72
|
+
subject, predicate, object = line.chomp.split(/\s/, 3)
|
73
|
+
object.sub!(/\s+\.$/, '')
|
74
|
+
feature_no += 1 if predicate == "<#{RDF.type}>" and object == "<#{BioInterchange::GFF3O.Feature}>"
|
75
|
+
}
|
76
|
+
lines.count.should be == 43
|
77
|
+
feature_no.should be == 3
|
70
78
|
end
|
71
79
|
end
|
72
80
|
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
# Turn off verbose reporting here, since class definitions may be loaded multiple
|
6
|
+
# times here. That reports that constants have been already been initialized, which
|
7
|
+
# is true, but they are only "re-initialized" with the very same values.
|
8
|
+
v, $VERBOSE = $VERBOSE, nil
|
9
|
+
load 'lib/biointerchange/core.rb'
|
10
|
+
load 'lib/biointerchange/gvf1o.rb'
|
11
|
+
load 'lib/biointerchange/sofa.rb'
|
12
|
+
load 'lib/biointerchange/reader.rb'
|
13
|
+
load 'lib/biointerchange/writer.rb'
|
14
|
+
load 'lib/biointerchange/genomics/gvf_feature_set.rb'
|
15
|
+
load 'lib/biointerchange/genomics/gvf_feature.rb'
|
16
|
+
# The GVF implementation extends the GFF3 implementation, so load those classes too:
|
17
|
+
load 'lib/biointerchange/genomics/gff3_rdf_ntriples.rb'
|
18
|
+
load 'lib/biointerchange/genomics/gff3_feature_set.rb'
|
19
|
+
load 'lib/biointerchange/genomics/gff3_feature.rb'
|
20
|
+
$VERBOSE = v
|
21
|
+
|
22
|
+
describe BioInterchange::Genomics::RDFWriter do
|
23
|
+
describe 'serialization of GVF models' do
|
24
|
+
it 'empty document' do
|
25
|
+
istream, ostream = IO.pipe
|
26
|
+
BioInterchange::Genomics::RDFWriter.new(ostream).serialize(BioInterchange::Genomics::GVFFeatureSet.new())
|
27
|
+
ostream.close
|
28
|
+
istream.read.lines.count.should eq(1)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'model with three features' do
|
32
|
+
istream, ostream = IO.pipe
|
33
|
+
set = BioInterchange::Genomics::GVFFeatureSet.new()
|
34
|
+
feature = BioInterchange::Genomics::GVFFeature.new(
|
35
|
+
'GRCh37.1',
|
36
|
+
'NCBI',
|
37
|
+
BioInterchange::SOFA.CDS,
|
38
|
+
32890598,
|
39
|
+
32890664,
|
40
|
+
0.1,
|
41
|
+
BioInterchange::Genomics::GFF3Feature::POSITIVE,
|
42
|
+
{ 'ID' => [ 'BRCA2' ], 'annotation' => [ 'manual' ] }
|
43
|
+
)
|
44
|
+
set.add(feature)
|
45
|
+
feature = BioInterchange::Genomics::GVFFeature.new(
|
46
|
+
'GRCh37.1',
|
47
|
+
'NCBI',
|
48
|
+
BioInterchange::SOFA.modified_base,
|
49
|
+
32890599,
|
50
|
+
32890599,
|
51
|
+
0.8,
|
52
|
+
BioInterchange::Genomics::GFF3Feature::POSITIVE,
|
53
|
+
{ 'ID' => [ 'aModifiedBase' ], 'Parent' => [ 'BRCA2' ] }
|
54
|
+
)
|
55
|
+
set.add(feature)
|
56
|
+
feature = BioInterchange::Genomics::GVFFeature.new(
|
57
|
+
'GRCh37.1',
|
58
|
+
'NCBI',
|
59
|
+
BioInterchange::SOFA.modified_base,
|
60
|
+
32890599,
|
61
|
+
32890599,
|
62
|
+
0.8,
|
63
|
+
BioInterchange::Genomics::GFF3Feature::POSITIVE,
|
64
|
+
{ 'Parent' => [ 'BRCA2', 'aModifiedBase' ] }
|
65
|
+
)
|
66
|
+
set.add(feature)
|
67
|
+
BioInterchange::Genomics::RDFWriter.new(ostream).serialize(set)
|
68
|
+
ostream.close
|
69
|
+
lines = istream.read.lines
|
70
|
+
feature_no = 0
|
71
|
+
lines.each { |line|
|
72
|
+
subject, predicate, object = line.chomp.split(/\s/, 3)
|
73
|
+
object.sub!(/\s+\.$/, '')
|
74
|
+
feature_no += 1 if predicate == "<#{RDF.type}>" and object == "<#{BioInterchange::GVF1O.Feature}>"
|
75
|
+
}
|
76
|
+
lines.count.should be == 43
|
77
|
+
feature_no.should be == 3
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
@@ -12,6 +12,7 @@ load 'lib/biointerchange/textmining/text_mining_reader.rb'
|
|
12
12
|
load 'lib/biointerchange/textmining/pubannos_json_reader.rb'
|
13
13
|
load 'lib/biointerchange/textmining/document.rb'
|
14
14
|
load 'lib/biointerchange/textmining/content.rb'
|
15
|
+
load 'lib/biointerchange/textmining/content_connection.rb'
|
15
16
|
load 'lib/biointerchange/textmining/process.rb'
|
16
17
|
$VERBOSE = v
|
17
18
|
|
@@ -28,23 +29,55 @@ describe BioInterchange::TextMining::PubannosJsonReader do
|
|
28
29
|
model.should be_an_instance_of BioInterchange::TextMining::Document
|
29
30
|
end
|
30
31
|
it 'read json from file' do
|
31
|
-
model = @reader.deserialize(File.new('examples/pubannotation.json'))
|
32
|
+
model = @reader.deserialize(File.new('examples/pubannotation.10096561.json'))
|
32
33
|
|
33
34
|
model.should be_an_instance_of BioInterchange::TextMining::Document
|
34
35
|
end
|
36
|
+
|
37
|
+
it 'read old json from file' do
|
38
|
+
model = @reader.deserialize(File.new('examples/pubannotation.10096561.json.old'))
|
39
|
+
|
40
|
+
model.should be_an_instance_of BioInterchange::TextMining::Document
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe 'old json generated model checks' do
|
45
|
+
before :all do
|
46
|
+
reader = BioInterchange::TextMining::PubannosJsonReader.new("TestOld", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0")
|
47
|
+
|
48
|
+
@model = reader.deserialize('{ "name": "Peter Smith", "name_id": "<peter.smith@example.json>", "date": "2012-08-12", "version": "3", "docurl":"http://example.org/example_json", "text":"Some document text. With two annotations of type protein.\n", "catanns":[{"annset_id":1,"begin":0,"category":"Protein","doc_id":9,"end":10,"id":139},{"annset_id":1,"begin":20,"category":"Protein","doc_id":9,"end":42,"id":138}]}')
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'model is of type document' do
|
53
|
+
@model.should be_an_instance_of BioInterchange::TextMining::Document
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'document uri (job id read)' do
|
57
|
+
@model.uri.should eql "http://example.org/example_json"
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'document has content' do
|
61
|
+
@model.contents.size.should eql 3
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'document document' do
|
65
|
+
@model.contents[0].type.should eql BioInterchange::TextMining::Content::DOCUMENT and @model.contents[0].offset.should eql 0 and @model.contents[0].length.should eql 58
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'document phrase' do
|
69
|
+
@model.contents[1].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[1].offset.should eql 0 and @model.contents[1].length.should eql 10 and
|
70
|
+
|
71
|
+
@model.contents[2].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[2].offset.should eql 20 and @model.contents[2].length.should eql 22
|
72
|
+
end
|
35
73
|
end
|
36
74
|
|
37
|
-
describe 'generated model
|
75
|
+
describe 'basic generated model checks' do
|
38
76
|
|
39
77
|
before :all do
|
40
78
|
reader = BioInterchange::TextMining::PubannosJsonReader.new("Test", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0")
|
41
79
|
|
42
|
-
@model = reader.deserialize('{ "name": "Peter Smith", "name_id": "<peter.smith@example.json>", "date": "2012-08
|
43
|
-
|
44
|
-
#puts "Document Model: #{@model.uri}"
|
45
|
-
# @model.contents.each do |c|
|
46
|
-
# puts "\tContent: #{c.type}, #{c.offset}, #{c.length}"
|
47
|
-
#end
|
80
|
+
@model = reader.deserialize('{ "name": "Peter Smith", "name_id": "<peter.smith@example.json>", "date": "2012-12-08", "version": "3", "docurl":"http://example.org/example_json", "text":"Some document text. With two annotations of type protein.\n", "catanns":[{"id":"T1","span":{"begin":0,"end":10},"category":"NP"},{"id":"T2","span":{"begin":20,"end":42},"category":"NP"}]}')
|
48
81
|
end
|
49
82
|
|
50
83
|
it 'model is of type document' do
|
@@ -65,10 +98,49 @@ describe BioInterchange::TextMining::PubannosJsonReader do
|
|
65
98
|
|
66
99
|
it 'document phrase' do
|
67
100
|
@model.contents[1].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[1].offset.should eql 0 and @model.contents[1].length.should eql 10 and
|
68
|
-
|
69
101
|
@model.contents[2].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[2].offset.should eql 20 and @model.contents[2].length.should eql 22
|
70
102
|
end
|
71
|
-
|
103
|
+
end
|
104
|
+
|
105
|
+
describe 'advanced generated model checks' do
|
106
|
+
|
107
|
+
before :all do
|
108
|
+
reader = BioInterchange::TextMining::PubannosJsonReader.new("Test", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0")
|
109
|
+
|
110
|
+
@model = reader.deserialize(File.new('examples/pubannotation.2626671.json'))
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'model is of type document' do
|
114
|
+
@model.should be_an_instance_of BioInterchange::TextMining::Document
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'document uri (job id read)' do
|
118
|
+
@model.uri.should eql "http://www.ncbi.nlm.nih.gov/pubmed/2626671"
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'document has content' do
|
122
|
+
@model.contents.size.should eql 91
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'document document' do
|
126
|
+
#range as exact length seems to depend on encoding used...
|
127
|
+
@model.contents[0].type.should eql BioInterchange::TextMining::Content::DOCUMENT and @model.contents[0].offset.should eql 0 and ( @model.contents[0].length.should > 2350 or @model.contents[0].length.should < 2360 )
|
128
|
+
end
|
129
|
+
|
130
|
+
it 'document content types and interconnections' do
|
131
|
+
doc = 1
|
132
|
+
sub = 39
|
133
|
+
eq = 62
|
134
|
+
th = 64
|
135
|
+
spec = 87
|
136
|
+
neg = 89
|
137
|
+
@model.contents[doc].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[doc].offset.should eql 9 and @model.contents[doc].length.should eql 10 and
|
138
|
+
@model.contents[sub].type.should eql BioInterchange::TextMining::ContentConnection::SUBCLASS and @model.contents[sub].content1.should eql nil and @model.contents[sub].content2.offset.should eql 9 and
|
139
|
+
@model.contents[eq].type.should eql BioInterchange::TextMining::ContentConnection::EQUIVALENCE and @model.contents[eq].content1.offset.should eql 396 and @model.contents[eq].content2.offset.should eql 386 and
|
140
|
+
@model.contents[th].type.should eql BioInterchange::TextMining::ContentConnection::THEME and @model.contents[th].content1.offset.should eql 32 and @model.contents[th].content2.content2.offset.should eql 9 and
|
141
|
+
@model.contents[spec].type.should eql BioInterchange::TextMining::ContentConnection::SPECULATION and @model.contents[spec].content1.content2.offset.should eql 9 and @model.contents[spec].content2.should eql nil and
|
142
|
+
@model.contents[neg].type.should eql BioInterchange::TextMining::ContentConnection::NEGATION and @model.contents[neg].content1.content2.offset.should eql 426 and @model.contents[neg].content2.should eql nil
|
143
|
+
end
|
72
144
|
end
|
73
145
|
|
74
146
|
end
|
@@ -58,6 +58,17 @@ describe BioInterchange::TextMining::RDFWriter do
|
|
58
58
|
ostream.close
|
59
59
|
istream.read.lines.count.should be > 1
|
60
60
|
end
|
61
|
+
|
62
|
+
it 'full advanced json document' do
|
63
|
+
ostream = StringIO.new
|
64
|
+
reader = BioInterchange::TextMining::PubannosJsonReader.new("Test", "http://test.com", "2012-12-09", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0")
|
65
|
+
|
66
|
+
model = reader.deserialize(File.new('examples/pubannotation.2626671.json'))
|
67
|
+
|
68
|
+
BioInterchange::TextMining::RDFWriter.new(ostream).serialize(model)
|
69
|
+
ostream.close_write
|
70
|
+
ostream.string.lines.count.should > 100
|
71
|
+
end
|
61
72
|
end
|
62
73
|
end
|
63
74
|
|