biointerchange 0.2.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/README.md +269 -19
- data/VERSION +1 -1
- data/examples/bininda_emonds_mammals.new +1 -0
- data/examples/rdfization.rb +17 -0
- data/examples/tree1.new +1 -0
- data/examples/tree2.new +1 -0
- data/examples/vocabulary.rb +26 -5
- data/generators/javaify.rb +12 -18
- data/generators/make_supplement_releases.rb +2 -0
- data/generators/pythonify.rb +21 -8
- data/generators/rdfxml.rb +15 -1
- data/lib/biointerchange/cdao.rb +2014 -0
- data/lib/biointerchange/core.rb +70 -77
- data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +16 -0
- data/lib/biointerchange/genomics/gff3_reader.rb +18 -4
- data/lib/biointerchange/genomics/gvf_reader.rb +14 -0
- data/lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb +108 -0
- data/lib/biointerchange/phylogenetics/newick_reader.rb +81 -0
- data/lib/biointerchange/phylogenetics/tree_set.rb +50 -0
- data/lib/biointerchange/registry.rb +50 -8
- data/lib/biointerchange/so.rb +150 -0
- data/lib/biointerchange/textmining/pdfx_xml_reader.rb +21 -2
- data/lib/biointerchange/textmining/pubannos_json_reader.rb +24 -1
- data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +9 -0
- data/lib/biointerchange/textmining/text_mining_reader.rb +5 -5
- data/spec/phylogenetics_spec.rb +79 -0
- data/supplemental/java/biointerchange/pom.xml +1 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/CDAO.java +2602 -0
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +30 -28
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +136 -104
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +367 -278
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +4388 -3127
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +5970 -4351
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +733 -544
- data/supplemental/java/biointerchange/src/test/java/org/biointerchange/AppTest.java +3 -1
- data/supplemental/python/biointerchange/cdao.py +2021 -0
- data/supplemental/python/biointerchange/faldo.py +37 -38
- data/supplemental/python/biointerchange/gff3o.py +156 -157
- data/supplemental/python/biointerchange/goxref.py +172 -172
- data/supplemental/python/biointerchange/gvf1o.py +428 -429
- data/supplemental/python/biointerchange/sio.py +3133 -3134
- data/supplemental/python/biointerchange/so.py +6626 -6527
- data/supplemental/python/biointerchange/sofa.py +790 -791
- data/supplemental/python/example.py +23 -5
- data/supplemental/python/setup.py +2 -2
- data/web/about.html +1 -0
- data/web/api.html +223 -15
- data/web/biointerchange.js +27 -6
- data/web/cli.html +8 -3
- data/web/index.html +6 -2
- data/web/ontologies.html +3 -0
- data/web/service/rdfizer.fcgi +7 -15
- data/web/webservices.html +6 -2
- metadata +30 -3
data/lib/biointerchange/so.rb
CHANGED
@@ -13114,6 +13114,156 @@ class SO
|
|
13114
13114
|
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/SEQUENCE_variant_of') then
|
13115
13115
|
return true
|
13116
13116
|
end
|
13117
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/adjacent_to') then
|
13118
|
+
return true
|
13119
|
+
end
|
13120
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/associated_with') then
|
13121
|
+
return true
|
13122
|
+
end
|
13123
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/complete_evidence_for_feature') then
|
13124
|
+
return true
|
13125
|
+
end
|
13126
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/connects_on') then
|
13127
|
+
return true
|
13128
|
+
end
|
13129
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contained_by') then
|
13130
|
+
return true
|
13131
|
+
end
|
13132
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contains') then
|
13133
|
+
return true
|
13134
|
+
end
|
13135
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/derives_from') then
|
13136
|
+
return true
|
13137
|
+
end
|
13138
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/disconnected_from') then
|
13139
|
+
return true
|
13140
|
+
end
|
13141
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_from') then
|
13142
|
+
return true
|
13143
|
+
end
|
13144
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_to') then
|
13145
|
+
return true
|
13146
|
+
end
|
13147
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/evidence_for_feature') then
|
13148
|
+
return true
|
13149
|
+
end
|
13150
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/exemplar_of') then
|
13151
|
+
return true
|
13152
|
+
end
|
13153
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finished_by') then
|
13154
|
+
return true
|
13155
|
+
end
|
13156
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finishes') then
|
13157
|
+
return true
|
13158
|
+
end
|
13159
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/gained') then
|
13160
|
+
return true
|
13161
|
+
end
|
13162
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/genome_of') then
|
13163
|
+
return true
|
13164
|
+
end
|
13165
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guided_by') then
|
13166
|
+
return true
|
13167
|
+
end
|
13168
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guides') then
|
13169
|
+
return true
|
13170
|
+
end
|
13171
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_integral_part') then
|
13172
|
+
return true
|
13173
|
+
end
|
13174
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_origin') then
|
13175
|
+
return true
|
13176
|
+
end
|
13177
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_part') then
|
13178
|
+
return true
|
13179
|
+
end
|
13180
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_quality') then
|
13181
|
+
return true
|
13182
|
+
end
|
13183
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/homologous_to') then
|
13184
|
+
return true
|
13185
|
+
end
|
13186
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/integral_part_of') then
|
13187
|
+
return true
|
13188
|
+
end
|
13189
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/is_consecutive_sequence_of') then
|
13190
|
+
return true
|
13191
|
+
end
|
13192
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/lost') then
|
13193
|
+
return true
|
13194
|
+
end
|
13195
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/maximally_overlaps') then
|
13196
|
+
return true
|
13197
|
+
end
|
13198
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/member_of') then
|
13199
|
+
return true
|
13200
|
+
end
|
13201
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/non_functional_homolog_of') then
|
13202
|
+
return true
|
13203
|
+
end
|
13204
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/orthologous_to') then
|
13205
|
+
return true
|
13206
|
+
end
|
13207
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/overlaps') then
|
13208
|
+
return true
|
13209
|
+
end
|
13210
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/paralogous_to') then
|
13211
|
+
return true
|
13212
|
+
end
|
13213
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/part_of') then
|
13214
|
+
return true
|
13215
|
+
end
|
13216
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/partial_evidence_for_feature') then
|
13217
|
+
return true
|
13218
|
+
end
|
13219
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/position_of') then
|
13220
|
+
return true
|
13221
|
+
end
|
13222
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_from') then
|
13223
|
+
return true
|
13224
|
+
end
|
13225
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_into') then
|
13226
|
+
return true
|
13227
|
+
end
|
13228
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_from') then
|
13229
|
+
return true
|
13230
|
+
end
|
13231
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_to') then
|
13232
|
+
return true
|
13233
|
+
end
|
13234
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/sequence_of') then
|
13235
|
+
return true
|
13236
|
+
end
|
13237
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/similar_to') then
|
13238
|
+
return true
|
13239
|
+
end
|
13240
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/started_by') then
|
13241
|
+
return true
|
13242
|
+
end
|
13243
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/starts') then
|
13244
|
+
return true
|
13245
|
+
end
|
13246
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_from') then
|
13247
|
+
return true
|
13248
|
+
end
|
13249
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_to') then
|
13250
|
+
return true
|
13251
|
+
end
|
13252
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_from') then
|
13253
|
+
return true
|
13254
|
+
end
|
13255
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_to') then
|
13256
|
+
return true
|
13257
|
+
end
|
13258
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translates_to') then
|
13259
|
+
return true
|
13260
|
+
end
|
13261
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translation_of') then
|
13262
|
+
return true
|
13263
|
+
end
|
13264
|
+
if uri == RDF::URI.new('http://purl.obolibrary.org/obo/variant_of') then
|
13265
|
+
return true
|
13266
|
+
end
|
13117
13267
|
return false
|
13118
13268
|
end
|
13119
13269
|
|
@@ -5,6 +5,27 @@ require 'rexml/streamlistener'
|
|
5
5
|
|
6
6
|
class PDFxXMLReader < BioInterchange::TextMining::TMReader
|
7
7
|
|
8
|
+
# Register reader:
|
9
|
+
BioInterchange::Registry.register_reader(
|
10
|
+
'uk.ac.man.pdfx',
|
11
|
+
PDFxXMLReader,
|
12
|
+
[
|
13
|
+
'name',
|
14
|
+
'name_id',
|
15
|
+
'date',
|
16
|
+
[ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
|
17
|
+
'version'
|
18
|
+
],
|
19
|
+
false,
|
20
|
+
'PDFx XML reader',
|
21
|
+
[
|
22
|
+
[ 'date <date>', 'date when the GFF3 file was created (optional)' ],
|
23
|
+
[ 'version <version>', 'version number of resource (optional)' ],
|
24
|
+
[ 'name <name>', 'name of the GFF3 file creator (required)' ],
|
25
|
+
[ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
|
26
|
+
]
|
27
|
+
)
|
28
|
+
|
8
29
|
# Reads input stream and returns associated +BioInterchange::TextMining::Document+ model
|
9
30
|
#
|
10
31
|
# Presently I assume a single document per xml file,
|
@@ -14,8 +35,6 @@ class PDFxXMLReader < BioInterchange::TextMining::TMReader
|
|
14
35
|
#
|
15
36
|
# +inputstream+:: Input IO stream to deserialize
|
16
37
|
def deserialize(inputstream)
|
17
|
-
#super(inputstream)
|
18
|
-
|
19
38
|
raise BioInterchange::Exceptions::ImplementationReaderError, 'InputStream not of type IO, cannot read.' unless inputstream.kind_of?(IO) or inputstream.kind_of?(String)
|
20
39
|
|
21
40
|
@input = inputstream
|
@@ -5,13 +5,36 @@ require 'json'
|
|
5
5
|
|
6
6
|
class PubAnnosJSONReader < BioInterchange::TextMining::TMReader
|
7
7
|
|
8
|
+
# Register reader:
|
9
|
+
BioInterchange::Registry.register_reader(
|
10
|
+
'dbcls.catanns.json',
|
11
|
+
PubAnnosJSONReader,
|
12
|
+
[
|
13
|
+
'name',
|
14
|
+
'name_id',
|
15
|
+
'date',
|
16
|
+
[ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
|
17
|
+
'version'
|
18
|
+
],
|
19
|
+
false,
|
20
|
+
'PDFx XML reader',
|
21
|
+
[
|
22
|
+
[ 'date <date>', 'date when the GFF3 file was created (optional)' ],
|
23
|
+
[ 'version <version>', 'version number of resource (optional)' ],
|
24
|
+
[ 'name <name>', 'name of the GFF3 file creator (required)' ],
|
25
|
+
[ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
|
26
|
+
]
|
27
|
+
)
|
28
|
+
|
29
|
+
# Deserialize a PubAnnotations JSON object.
|
30
|
+
#
|
31
|
+
# +inputstream+:: Input IO stream to deserialize
|
8
32
|
def deserialize(inputstream)
|
9
33
|
if inputstream.kind_of?(IO) then
|
10
34
|
pubannos(inputstream.read)
|
11
35
|
elsif inputstream.kind_of?(String) then
|
12
36
|
pubannos(inputstream)
|
13
37
|
else
|
14
|
-
#else raise exception
|
15
38
|
super(inputstream)
|
16
39
|
end
|
17
40
|
end
|
@@ -6,6 +6,15 @@ module BioInterchange::TextMining
|
|
6
6
|
|
7
7
|
class RDFWriter < BioInterchange::Writer
|
8
8
|
|
9
|
+
# Register writers:
|
10
|
+
BioInterchange::Registry.register_writer(
|
11
|
+
'rdf.bh12.sio',
|
12
|
+
RDFWriter,
|
13
|
+
[ 'dbcls.catanns.json', 'uk.ac.man.pdfx' ],
|
14
|
+
false,
|
15
|
+
'Semanticscience Integrated Ontology (SIO) based text-mining RDFization'
|
16
|
+
)
|
17
|
+
|
9
18
|
# Creates a new instance of a RDFWriter that will use the provided output stream to serialize RDF.
|
10
19
|
#
|
11
20
|
# +ostream+:: instance of an IO class or derivative that is used for RDF serialization
|
@@ -5,16 +5,16 @@ class TMReader < BioInterchange::Reader
|
|
5
5
|
# Create a new instance of a text-mining data reader. Sets @process to a new +BioInterchange::TextMining::Process+ object.
|
6
6
|
#
|
7
7
|
# +name+:: Name of the process which generated this data
|
8
|
-
# +
|
8
|
+
# +name_id+:: URI of the resource that generated this data
|
9
9
|
# +date+:: Optional date of data creation
|
10
10
|
# +processtype+:: Type of process that created this content
|
11
11
|
# +version+:: Optional version number of resource that created this data (nil if manually curated, for example).
|
12
|
-
def initialize(name,
|
13
|
-
|
12
|
+
def initialize(name, name_id, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
|
13
|
+
raise ArgumentError, 'Require "name" and "name_id" options to specify source of annotations (e.g., a manual annotators name, or software tool name) and their associated URI (e.g., email address, or webaddress).' unless name and name_id
|
14
|
+
|
14
15
|
metadata = {}
|
15
16
|
metadata[BioInterchange::TextMining::Process::VERSION] = version
|
16
|
-
@process = BioInterchange::TextMining::Process.new(name,
|
17
|
-
|
17
|
+
@process = BioInterchange::TextMining::Process.new(name, name_id, processtype, metadata, date)
|
18
18
|
end
|
19
19
|
|
20
20
|
|
@@ -0,0 +1,79 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rspec'
|
4
|
+
require 'bio'
|
5
|
+
|
6
|
+
# Turn off verbose reporting here, since class definitions may be loaded multiple
|
7
|
+
# times here. That reports that constants have been already been initialized, which
|
8
|
+
# is true, but they are only "re-initialized" with the very same values.
|
9
|
+
v, $VERBOSE = $VERBOSE, nil
|
10
|
+
load 'lib/biointerchange/core.rb'
|
11
|
+
load 'lib/biointerchange/cdao.rb'
|
12
|
+
load 'lib/biointerchange/reader.rb'
|
13
|
+
load 'lib/biointerchange/model.rb'
|
14
|
+
load 'lib/biointerchange/writer.rb'
|
15
|
+
load 'lib/biointerchange/phylogenetics/newick_reader.rb'
|
16
|
+
load 'lib/biointerchange/phylogenetics/tree_set.rb'
|
17
|
+
load 'lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb'
|
18
|
+
$VERBOSE = v
|
19
|
+
|
20
|
+
describe BioInterchange::Phylogenetics::NewickReader do
|
21
|
+
describe 'deserialization of Newick trees' do
|
22
|
+
it 'empty document' do
|
23
|
+
tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new(''))
|
24
|
+
trees = 0
|
25
|
+
tree_file.each_entry { |tree| trees += 1 }
|
26
|
+
trees.should eq(0)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'single Newick tree' do
|
30
|
+
tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new('(,,(,));'))
|
31
|
+
trees = 0
|
32
|
+
tree_file.each_entry { |newick_tree|
|
33
|
+
trees += 1
|
34
|
+
newick_tree.tree.edges.length.should eq(5)
|
35
|
+
newick_tree.tree.nodes.length.should eq(6)
|
36
|
+
}
|
37
|
+
trees.should eq(1)
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'three Newick trees' do
|
41
|
+
tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new("(,,(,));\n(A,B,(C,D));\n(A,B,(C,D)E)F;"))
|
42
|
+
trees = 0
|
43
|
+
tree_file.each_entry { |newick_tree|
|
44
|
+
trees += 1
|
45
|
+
newick_tree.tree.edges.length.should eq(5)
|
46
|
+
newick_tree.tree.nodes.length.should eq(6)
|
47
|
+
}
|
48
|
+
trees.should eq(3)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'model consistency' do
|
52
|
+
model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
|
53
|
+
model.should be_an_instance_of BioInterchange::Phylogenetics::TreeSet
|
54
|
+
model.contents.length.should eq(1)
|
55
|
+
model.contents.first.edges.length.should eq(6)
|
56
|
+
model.contents.first.nodes.length.should eq(7)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe BioInterchange::Phylogenetics::CDAORDFWriter do
|
62
|
+
describe 'serialization of tree models' do
|
63
|
+
it 'empty document' do
|
64
|
+
istream, ostream = IO.pipe
|
65
|
+
BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(BioInterchange::Phylogenetics::TreeSet.new())
|
66
|
+
ostream.close
|
67
|
+
istream.read.lines.count.should eq(0)
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'single Newick tree' do
|
71
|
+
istream, ostream = IO.pipe
|
72
|
+
model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
|
73
|
+
BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(model)
|
74
|
+
ostream.close
|
75
|
+
istream.read.lines.count.should eq(151)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|