biointerchange 0.2.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/README.md +269 -19
- data/VERSION +1 -1
- data/examples/bininda_emonds_mammals.new +1 -0
- data/examples/rdfization.rb +17 -0
- data/examples/tree1.new +1 -0
- data/examples/tree2.new +1 -0
- data/examples/vocabulary.rb +26 -5
- data/generators/javaify.rb +12 -18
- data/generators/make_supplement_releases.rb +2 -0
- data/generators/pythonify.rb +21 -8
- data/generators/rdfxml.rb +15 -1
- data/lib/biointerchange/cdao.rb +2014 -0
- data/lib/biointerchange/core.rb +70 -77
- data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +16 -0
- data/lib/biointerchange/genomics/gff3_reader.rb +18 -4
- data/lib/biointerchange/genomics/gvf_reader.rb +14 -0
- data/lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb +108 -0
- data/lib/biointerchange/phylogenetics/newick_reader.rb +81 -0
- data/lib/biointerchange/phylogenetics/tree_set.rb +50 -0
- data/lib/biointerchange/registry.rb +50 -8
- data/lib/biointerchange/so.rb +150 -0
- data/lib/biointerchange/textmining/pdfx_xml_reader.rb +21 -2
- data/lib/biointerchange/textmining/pubannos_json_reader.rb +24 -1
- data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +9 -0
- data/lib/biointerchange/textmining/text_mining_reader.rb +5 -5
- data/spec/phylogenetics_spec.rb +79 -0
- data/supplemental/java/biointerchange/pom.xml +1 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/CDAO.java +2602 -0
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +30 -28
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +136 -104
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +367 -278
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +4388 -3127
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +5970 -4351
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +733 -544
- data/supplemental/java/biointerchange/src/test/java/org/biointerchange/AppTest.java +3 -1
- data/supplemental/python/biointerchange/cdao.py +2021 -0
- data/supplemental/python/biointerchange/faldo.py +37 -38
- data/supplemental/python/biointerchange/gff3o.py +156 -157
- data/supplemental/python/biointerchange/goxref.py +172 -172
- data/supplemental/python/biointerchange/gvf1o.py +428 -429
- data/supplemental/python/biointerchange/sio.py +3133 -3134
- data/supplemental/python/biointerchange/so.py +6626 -6527
- data/supplemental/python/biointerchange/sofa.py +790 -791
- data/supplemental/python/example.py +23 -5
- data/supplemental/python/setup.py +2 -2
- data/web/about.html +1 -0
- data/web/api.html +223 -15
- data/web/biointerchange.js +27 -6
- data/web/cli.html +8 -3
- data/web/index.html +6 -2
- data/web/ontologies.html +3 -0
- data/web/service/rdfizer.fcgi +7 -15
- data/web/webservices.html +6 -2
- metadata +30 -3
data/lib/biointerchange/so.rb
CHANGED
@@ -13114,6 +13114,156 @@ class SO
|
|
13114
13114
|
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/SEQUENCE_variant_of') then
|
13115
13115
|
return true
|
13116
13116
|
end
|
13117
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/adjacent_to') then
|
13118
|
+
return true
|
13119
|
+
end
|
13120
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/associated_with') then
|
13121
|
+
return true
|
13122
|
+
end
|
13123
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/complete_evidence_for_feature') then
|
13124
|
+
return true
|
13125
|
+
end
|
13126
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/connects_on') then
|
13127
|
+
return true
|
13128
|
+
end
|
13129
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contained_by') then
|
13130
|
+
return true
|
13131
|
+
end
|
13132
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contains') then
|
13133
|
+
return true
|
13134
|
+
end
|
13135
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/derives_from') then
|
13136
|
+
return true
|
13137
|
+
end
|
13138
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/disconnected_from') then
|
13139
|
+
return true
|
13140
|
+
end
|
13141
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_from') then
|
13142
|
+
return true
|
13143
|
+
end
|
13144
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_to') then
|
13145
|
+
return true
|
13146
|
+
end
|
13147
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/evidence_for_feature') then
|
13148
|
+
return true
|
13149
|
+
end
|
13150
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/exemplar_of') then
|
13151
|
+
return true
|
13152
|
+
end
|
13153
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finished_by') then
|
13154
|
+
return true
|
13155
|
+
end
|
13156
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finishes') then
|
13157
|
+
return true
|
13158
|
+
end
|
13159
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/gained') then
|
13160
|
+
return true
|
13161
|
+
end
|
13162
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/genome_of') then
|
13163
|
+
return true
|
13164
|
+
end
|
13165
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guided_by') then
|
13166
|
+
return true
|
13167
|
+
end
|
13168
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guides') then
|
13169
|
+
return true
|
13170
|
+
end
|
13171
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_integral_part') then
|
13172
|
+
return true
|
13173
|
+
end
|
13174
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_origin') then
|
13175
|
+
return true
|
13176
|
+
end
|
13177
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_part') then
|
13178
|
+
return true
|
13179
|
+
end
|
13180
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_quality') then
|
13181
|
+
return true
|
13182
|
+
end
|
13183
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/homologous_to') then
|
13184
|
+
return true
|
13185
|
+
end
|
13186
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/integral_part_of') then
|
13187
|
+
return true
|
13188
|
+
end
|
13189
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/is_consecutive_sequence_of') then
|
13190
|
+
return true
|
13191
|
+
end
|
13192
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/lost') then
|
13193
|
+
return true
|
13194
|
+
end
|
13195
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/maximally_overlaps') then
|
13196
|
+
return true
|
13197
|
+
end
|
13198
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/member_of') then
|
13199
|
+
return true
|
13200
|
+
end
|
13201
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/non_functional_homolog_of') then
|
13202
|
+
return true
|
13203
|
+
end
|
13204
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/orthologous_to') then
|
13205
|
+
return true
|
13206
|
+
end
|
13207
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/overlaps') then
|
13208
|
+
return true
|
13209
|
+
end
|
13210
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/paralogous_to') then
|
13211
|
+
return true
|
13212
|
+
end
|
13213
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/part_of') then
|
13214
|
+
return true
|
13215
|
+
end
|
13216
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/partial_evidence_for_feature') then
|
13217
|
+
return true
|
13218
|
+
end
|
13219
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/position_of') then
|
13220
|
+
return true
|
13221
|
+
end
|
13222
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_from') then
|
13223
|
+
return true
|
13224
|
+
end
|
13225
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_into') then
|
13226
|
+
return true
|
13227
|
+
end
|
13228
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_from') then
|
13229
|
+
return true
|
13230
|
+
end
|
13231
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_to') then
|
13232
|
+
return true
|
13233
|
+
end
|
13234
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/sequence_of') then
|
13235
|
+
return true
|
13236
|
+
end
|
13237
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/similar_to') then
|
13238
|
+
return true
|
13239
|
+
end
|
13240
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/started_by') then
|
13241
|
+
return true
|
13242
|
+
end
|
13243
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/starts') then
|
13244
|
+
return true
|
13245
|
+
end
|
13246
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_from') then
|
13247
|
+
return true
|
13248
|
+
end
|
13249
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_to') then
|
13250
|
+
return true
|
13251
|
+
end
|
13252
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_from') then
|
13253
|
+
return true
|
13254
|
+
end
|
13255
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_to') then
|
13256
|
+
return true
|
13257
|
+
end
|
13258
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translates_to') then
|
13259
|
+
return true
|
13260
|
+
end
|
13261
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translation_of') then
|
13262
|
+
return true
|
13263
|
+
end
|
13264
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/variant_of') then
|
13265
|
+
return true
|
13266
|
+
end
|
13117
13267
|
return false
|
13118
13268
|
end
|
13119
13269
|
|
@@ -5,6 +5,27 @@ require 'rexml/streamlistener'
|
|
5
5
|
|
6
6
|
class PDFxXMLReader < BioInterchange::TextMining::TMReader
|
7
7
|
|
8
|
+
# Register reader:
|
9
|
+
BioInterchange::Registry.register_reader(
|
10
|
+
'uk.ac.man.pdfx',
|
11
|
+
PDFxXMLReader,
|
12
|
+
[
|
13
|
+
'name',
|
14
|
+
'name_id',
|
15
|
+
'date',
|
16
|
+
[ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
|
17
|
+
'version'
|
18
|
+
],
|
19
|
+
false,
|
20
|
+
'PDFx XML reader',
|
21
|
+
[
|
22
|
+
[ 'date <date>', 'date when the GFF3 file was created (optional)' ],
|
23
|
+
[ 'version <version>', 'version number of resource (optional)' ],
|
24
|
+
[ 'name <name>', 'name of the GFF3 file creator (required)' ],
|
25
|
+
[ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
|
26
|
+
]
|
27
|
+
)
|
28
|
+
|
8
29
|
# Reads input stream and returns associated +BioInterchange::TextMining::Document+ model
|
9
30
|
#
|
10
31
|
# Presently I assume a single document per xml file,
|
@@ -14,8 +35,6 @@ class PDFxXMLReader < BioInterchange::TextMining::TMReader
|
|
14
35
|
#
|
15
36
|
# +inputstream+:: Input IO stream to deserialize
|
16
37
|
def deserialize(inputstream)
|
17
|
-
#super(inputstream)
|
18
|
-
|
19
38
|
raise BioInterchange::Exceptions::ImplementationReaderError, 'InputStream not of type IO, cannot read.' unless inputstream.kind_of?(IO) or inputstream.kind_of?(String)
|
20
39
|
|
21
40
|
@input = inputstream
|
@@ -5,13 +5,36 @@ require 'json'
|
|
5
5
|
|
6
6
|
class PubAnnosJSONReader < BioInterchange::TextMining::TMReader
|
7
7
|
|
8
|
+
# Register reader:
|
9
|
+
BioInterchange::Registry.register_reader(
|
10
|
+
'dbcls.catanns.json',
|
11
|
+
PubAnnosJSONReader,
|
12
|
+
[
|
13
|
+
'name',
|
14
|
+
'name_id',
|
15
|
+
'date',
|
16
|
+
[ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
|
17
|
+
'version'
|
18
|
+
],
|
19
|
+
false,
|
20
|
+
'PDFx XML reader',
|
21
|
+
[
|
22
|
+
[ 'date <date>', 'date when the GFF3 file was created (optional)' ],
|
23
|
+
[ 'version <version>', 'version number of resource (optional)' ],
|
24
|
+
[ 'name <name>', 'name of the GFF3 file creator (required)' ],
|
25
|
+
[ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
|
26
|
+
]
|
27
|
+
)
|
28
|
+
|
29
|
+
# Deserialize a PubAnnotations JSON object.
|
30
|
+
#
|
31
|
+
# +inputstream+:: Input IO stream to deserialize
|
8
32
|
def deserialize(inputstream)
|
9
33
|
if inputstream.kind_of?(IO) then
|
10
34
|
pubannos(inputstream.read)
|
11
35
|
elsif inputstream.kind_of?(String) then
|
12
36
|
pubannos(inputstream)
|
13
37
|
else
|
14
|
-
#else raise exception
|
15
38
|
super(inputstream)
|
16
39
|
end
|
17
40
|
end
|
@@ -6,6 +6,15 @@ module BioInterchange::TextMining
|
|
6
6
|
|
7
7
|
class RDFWriter < BioInterchange::Writer
|
8
8
|
|
9
|
+
# Register writers:
|
10
|
+
BioInterchange::Registry.register_writer(
|
11
|
+
'rdf.bh12.sio',
|
12
|
+
RDFWriter,
|
13
|
+
[ 'dbcls.catanns.json', 'uk.ac.man.pdfx' ],
|
14
|
+
false,
|
15
|
+
'Semanticscience Integrated Ontology (SIO) based text-mining RDFization'
|
16
|
+
)
|
17
|
+
|
9
18
|
# Creates a new instance of a RDFWriter that will use the provided output stream to serialize RDF.
|
10
19
|
#
|
11
20
|
# +ostream+:: instance of an IO class or derivative that is used for RDF serialization
|
@@ -5,16 +5,16 @@ class TMReader < BioInterchange::Reader
|
|
5
5
|
# Create a new instance of a text-mining data reader. Sets @process to a new +BioInterchange::TextMining::Process+ object.
|
6
6
|
#
|
7
7
|
# +name+:: Name of the process which generated this data
|
8
|
-
# +
|
8
|
+
# +name_id+:: URI of the resource that generated this data
|
9
9
|
# +date+:: Optional date of data creation
|
10
10
|
# +processtype+:: Type of process that created this content
|
11
11
|
# +version+:: Optional version number of resource that created this data (nil if manually curated, for example).
|
12
|
-
def initialize(name,
|
13
|
-
|
12
|
+
def initialize(name, name_id, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
|
13
|
+
raise ArgumentError, 'Require "name" and "name_id" options to specify source of annotations (e.g., a manual annotators name, or software tool name) and their associated URI (e.g., email address, or webaddress).' unless name and name_id
|
14
|
+
|
14
15
|
metadata = {}
|
15
16
|
metadata[BioInterchange::TextMining::Process::VERSION] = version
|
16
|
-
@process = BioInterchange::TextMining::Process.new(name,
|
17
|
-
|
17
|
+
@process = BioInterchange::TextMining::Process.new(name, name_id, processtype, metadata, date)
|
18
18
|
end
|
19
19
|
|
20
20
|
|
@@ -0,0 +1,79 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rspec'
|
4
|
+
require 'bio'
|
5
|
+
|
6
|
+
# Turn off verbose reporting here, since class definitions may be loaded multiple
|
7
|
+
# times here. That reports that constants have been already been initialized, which
|
8
|
+
# is true, but they are only "re-initialized" with the very same values.
|
9
|
+
v, $VERBOSE = $VERBOSE, nil
|
10
|
+
load 'lib/biointerchange/core.rb'
|
11
|
+
load 'lib/biointerchange/cdao.rb'
|
12
|
+
load 'lib/biointerchange/reader.rb'
|
13
|
+
load 'lib/biointerchange/model.rb'
|
14
|
+
load 'lib/biointerchange/writer.rb'
|
15
|
+
load 'lib/biointerchange/phylogenetics/newick_reader.rb'
|
16
|
+
load 'lib/biointerchange/phylogenetics/tree_set.rb'
|
17
|
+
load 'lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb'
|
18
|
+
$VERBOSE = v
|
19
|
+
|
20
|
+
describe BioInterchange::Phylogenetics::NewickReader do
|
21
|
+
describe 'deserialization of Newick trees' do
|
22
|
+
it 'empty document' do
|
23
|
+
tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new(''))
|
24
|
+
trees = 0
|
25
|
+
tree_file.each_entry { |tree| trees += 1 }
|
26
|
+
trees.should eq(0)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'single Newick tree' do
|
30
|
+
tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new('(,,(,));'))
|
31
|
+
trees = 0
|
32
|
+
tree_file.each_entry { |newick_tree|
|
33
|
+
trees += 1
|
34
|
+
newick_tree.tree.edges.length.should eq(5)
|
35
|
+
newick_tree.tree.nodes.length.should eq(6)
|
36
|
+
}
|
37
|
+
trees.should eq(1)
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'three Newick trees' do
|
41
|
+
tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new("(,,(,));\n(A,B,(C,D));\n(A,B,(C,D)E)F;"))
|
42
|
+
trees = 0
|
43
|
+
tree_file.each_entry { |newick_tree|
|
44
|
+
trees += 1
|
45
|
+
newick_tree.tree.edges.length.should eq(5)
|
46
|
+
newick_tree.tree.nodes.length.should eq(6)
|
47
|
+
}
|
48
|
+
trees.should eq(3)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'model consistency' do
|
52
|
+
model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
|
53
|
+
model.should be_an_instance_of BioInterchange::Phylogenetics::TreeSet
|
54
|
+
model.contents.length.should eq(1)
|
55
|
+
model.contents.first.edges.length.should eq(6)
|
56
|
+
model.contents.first.nodes.length.should eq(7)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe BioInterchange::Phylogenetics::CDAORDFWriter do
|
62
|
+
describe 'serialization of tree models' do
|
63
|
+
it 'empty document' do
|
64
|
+
istream, ostream = IO.pipe
|
65
|
+
BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(BioInterchange::Phylogenetics::TreeSet.new())
|
66
|
+
ostream.close
|
67
|
+
istream.read.lines.count.should eq(0)
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'single Newick tree' do
|
71
|
+
istream, ostream = IO.pipe
|
72
|
+
model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
|
73
|
+
BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(model)
|
74
|
+
ostream.close
|
75
|
+
istream.read.lines.count.should eq(151)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|