biointerchange 0.2.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +269 -19
  3. data/VERSION +1 -1
  4. data/examples/bininda_emonds_mammals.new +1 -0
  5. data/examples/rdfization.rb +17 -0
  6. data/examples/tree1.new +1 -0
  7. data/examples/tree2.new +1 -0
  8. data/examples/vocabulary.rb +26 -5
  9. data/generators/javaify.rb +12 -18
  10. data/generators/make_supplement_releases.rb +2 -0
  11. data/generators/pythonify.rb +21 -8
  12. data/generators/rdfxml.rb +15 -1
  13. data/lib/biointerchange/cdao.rb +2014 -0
  14. data/lib/biointerchange/core.rb +70 -77
  15. data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +16 -0
  16. data/lib/biointerchange/genomics/gff3_reader.rb +18 -4
  17. data/lib/biointerchange/genomics/gvf_reader.rb +14 -0
  18. data/lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb +108 -0
  19. data/lib/biointerchange/phylogenetics/newick_reader.rb +81 -0
  20. data/lib/biointerchange/phylogenetics/tree_set.rb +50 -0
  21. data/lib/biointerchange/registry.rb +50 -8
  22. data/lib/biointerchange/so.rb +150 -0
  23. data/lib/biointerchange/textmining/pdfx_xml_reader.rb +21 -2
  24. data/lib/biointerchange/textmining/pubannos_json_reader.rb +24 -1
  25. data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +9 -0
  26. data/lib/biointerchange/textmining/text_mining_reader.rb +5 -5
  27. data/spec/phylogenetics_spec.rb +79 -0
  28. data/supplemental/java/biointerchange/pom.xml +1 -1
  29. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/CDAO.java +2602 -0
  30. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +30 -28
  31. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +136 -104
  32. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +367 -278
  33. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +4388 -3127
  34. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +5970 -4351
  35. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +733 -544
  36. data/supplemental/java/biointerchange/src/test/java/org/biointerchange/AppTest.java +3 -1
  37. data/supplemental/python/biointerchange/cdao.py +2021 -0
  38. data/supplemental/python/biointerchange/faldo.py +37 -38
  39. data/supplemental/python/biointerchange/gff3o.py +156 -157
  40. data/supplemental/python/biointerchange/goxref.py +172 -172
  41. data/supplemental/python/biointerchange/gvf1o.py +428 -429
  42. data/supplemental/python/biointerchange/sio.py +3133 -3134
  43. data/supplemental/python/biointerchange/so.py +6626 -6527
  44. data/supplemental/python/biointerchange/sofa.py +790 -791
  45. data/supplemental/python/example.py +23 -5
  46. data/supplemental/python/setup.py +2 -2
  47. data/web/about.html +1 -0
  48. data/web/api.html +223 -15
  49. data/web/biointerchange.js +27 -6
  50. data/web/cli.html +8 -3
  51. data/web/index.html +6 -2
  52. data/web/ontologies.html +3 -0
  53. data/web/service/rdfizer.fcgi +7 -15
  54. data/web/webservices.html +6 -2
  55. metadata +30 -3
@@ -13114,6 +13114,156 @@ class SO
13114
13114
  if uri == RDF::URI.new('http://purl.obolibrary.org/obo/SEQUENCE_variant_of') then
13115
13115
  return true
13116
13116
  end
13117
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/adjacent_to') then
13118
+ return true
13119
+ end
13120
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/associated_with') then
13121
+ return true
13122
+ end
13123
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/complete_evidence_for_feature') then
13124
+ return true
13125
+ end
13126
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/connects_on') then
13127
+ return true
13128
+ end
13129
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contained_by') then
13130
+ return true
13131
+ end
13132
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contains') then
13133
+ return true
13134
+ end
13135
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/derives_from') then
13136
+ return true
13137
+ end
13138
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/disconnected_from') then
13139
+ return true
13140
+ end
13141
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_from') then
13142
+ return true
13143
+ end
13144
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_to') then
13145
+ return true
13146
+ end
13147
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/evidence_for_feature') then
13148
+ return true
13149
+ end
13150
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/exemplar_of') then
13151
+ return true
13152
+ end
13153
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finished_by') then
13154
+ return true
13155
+ end
13156
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finishes') then
13157
+ return true
13158
+ end
13159
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/gained') then
13160
+ return true
13161
+ end
13162
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/genome_of') then
13163
+ return true
13164
+ end
13165
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guided_by') then
13166
+ return true
13167
+ end
13168
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guides') then
13169
+ return true
13170
+ end
13171
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_integral_part') then
13172
+ return true
13173
+ end
13174
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_origin') then
13175
+ return true
13176
+ end
13177
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_part') then
13178
+ return true
13179
+ end
13180
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_quality') then
13181
+ return true
13182
+ end
13183
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/homologous_to') then
13184
+ return true
13185
+ end
13186
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/integral_part_of') then
13187
+ return true
13188
+ end
13189
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/is_consecutive_sequence_of') then
13190
+ return true
13191
+ end
13192
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/lost') then
13193
+ return true
13194
+ end
13195
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/maximally_overlaps') then
13196
+ return true
13197
+ end
13198
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/member_of') then
13199
+ return true
13200
+ end
13201
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/non_functional_homolog_of') then
13202
+ return true
13203
+ end
13204
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/orthologous_to') then
13205
+ return true
13206
+ end
13207
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/overlaps') then
13208
+ return true
13209
+ end
13210
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/paralogous_to') then
13211
+ return true
13212
+ end
13213
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/part_of') then
13214
+ return true
13215
+ end
13216
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/partial_evidence_for_feature') then
13217
+ return true
13218
+ end
13219
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/position_of') then
13220
+ return true
13221
+ end
13222
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_from') then
13223
+ return true
13224
+ end
13225
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_into') then
13226
+ return true
13227
+ end
13228
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_from') then
13229
+ return true
13230
+ end
13231
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_to') then
13232
+ return true
13233
+ end
13234
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/sequence_of') then
13235
+ return true
13236
+ end
13237
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/similar_to') then
13238
+ return true
13239
+ end
13240
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/started_by') then
13241
+ return true
13242
+ end
13243
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/starts') then
13244
+ return true
13245
+ end
13246
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_from') then
13247
+ return true
13248
+ end
13249
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_to') then
13250
+ return true
13251
+ end
13252
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_from') then
13253
+ return true
13254
+ end
13255
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_to') then
13256
+ return true
13257
+ end
13258
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translates_to') then
13259
+ return true
13260
+ end
13261
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translation_of') then
13262
+ return true
13263
+ end
13264
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/variant_of') then
13265
+ return true
13266
+ end
13117
13267
  return false
13118
13268
  end
13119
13269
 
@@ -5,6 +5,27 @@ require 'rexml/streamlistener'
5
5
 
6
6
  class PDFxXMLReader < BioInterchange::TextMining::TMReader
7
7
 
8
+ # Register reader:
9
+ BioInterchange::Registry.register_reader(
10
+ 'uk.ac.man.pdfx',
11
+ PDFxXMLReader,
12
+ [
13
+ 'name',
14
+ 'name_id',
15
+ 'date',
16
+ [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
17
+ 'version'
18
+ ],
19
+ false,
20
+ 'PDFx XML reader',
21
+ [
22
+ [ 'date <date>', 'date when the GFF3 file was created (optional)' ],
23
+ [ 'version <version>', 'version number of resource (optional)' ],
24
+ [ 'name <name>', 'name of the GFF3 file creator (required)' ],
25
+ [ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
26
+ ]
27
+ )
28
+
8
29
  # Reads input stream and returns associated +BioInterchange::TextMining::Document+ model
9
30
  #
10
31
  # Presently I assume a single document per xml file,
@@ -14,8 +35,6 @@ class PDFxXMLReader < BioInterchange::TextMining::TMReader
14
35
  #
15
36
  # +inputstream+:: Input IO stream to deserialize
16
37
  def deserialize(inputstream)
17
- #super(inputstream)
18
-
19
38
  raise BioInterchange::Exceptions::ImplementationReaderError, 'InputStream not of type IO, cannot read.' unless inputstream.kind_of?(IO) or inputstream.kind_of?(String)
20
39
 
21
40
  @input = inputstream
@@ -5,13 +5,36 @@ require 'json'
5
5
 
6
6
  class PubAnnosJSONReader < BioInterchange::TextMining::TMReader
7
7
 
8
+ # Register reader:
9
+ BioInterchange::Registry.register_reader(
10
+ 'dbcls.catanns.json',
11
+ PubAnnosJSONReader,
12
+ [
13
+ 'name',
14
+ 'name_id',
15
+ 'date',
16
+ [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
17
+ 'version'
18
+ ],
19
+ false,
20
+ 'PDFx XML reader',
21
+ [
22
+ [ 'date <date>', 'date when the GFF3 file was created (optional)' ],
23
+ [ 'version <version>', 'version number of resource (optional)' ],
24
+ [ 'name <name>', 'name of the GFF3 file creator (required)' ],
25
+ [ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
26
+ ]
27
+ )
28
+
29
+ # Deserialize a PubAnnotations JSON object.
30
+ #
31
+ # +inputstream+:: Input IO stream to deserialize
8
32
  def deserialize(inputstream)
9
33
  if inputstream.kind_of?(IO) then
10
34
  pubannos(inputstream.read)
11
35
  elsif inputstream.kind_of?(String) then
12
36
  pubannos(inputstream)
13
37
  else
14
- #else raise exception
15
38
  super(inputstream)
16
39
  end
17
40
  end
@@ -6,6 +6,15 @@ module BioInterchange::TextMining
6
6
 
7
7
  class RDFWriter < BioInterchange::Writer
8
8
 
9
+ # Register writers:
10
+ BioInterchange::Registry.register_writer(
11
+ 'rdf.bh12.sio',
12
+ RDFWriter,
13
+ [ 'dbcls.catanns.json', 'uk.ac.man.pdfx' ],
14
+ false,
15
+ 'Semanticscience Integrated Ontology (SIO) based text-mining RDFization'
16
+ )
17
+
9
18
  # Creates a new instance of a RDFWriter that will use the provided output stream to serialize RDF.
10
19
  #
11
20
  # +ostream+:: instance of an IO class or derivative that is used for RDF serialization
@@ -5,16 +5,16 @@ class TMReader < BioInterchange::Reader
5
5
  # Create a new instance of a text-mining data reader. Sets @process to a new +BioInterchange::TextMining::Process+ object.
6
6
  #
7
7
  # +name+:: Name of the process which generated this data
8
- # +name_uri+:: URI of the resource that generated this data
8
+ # +name_id+:: URI of the resource that generated this data
9
9
  # +date+:: Optional date of data creation
10
10
  # +processtype+:: Type of process that created this content
11
11
  # +version+:: Optional version number of resource that created this data (nil if manually curated, for example).
12
- def initialize(name, name_uri, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
13
-
12
+ def initialize(name, name_id, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
13
+ raise ArgumentError, 'Require "name" and "name_id" options to specify source of annotations (e.g., a manual annotators name, or software tool name) and their associated URI (e.g., email address, or webaddress).' unless name and name_id
14
+
14
15
  metadata = {}
15
16
  metadata[BioInterchange::TextMining::Process::VERSION] = version
16
- @process = BioInterchange::TextMining::Process.new(name, name_uri, processtype, metadata, date)
17
-
17
+ @process = BioInterchange::TextMining::Process.new(name, name_id, processtype, metadata, date)
18
18
  end
19
19
 
20
20
 
@@ -0,0 +1,79 @@
1
+
2
+ require 'rubygems'
3
+ require 'rspec'
4
+ require 'bio'
5
+
6
+ # Turn off verbose reporting here, since class definitions may be loaded multiple
7
+ # times here. That reports that constants have been already been initialized, which
8
+ # is true, but they are only "re-initialized" with the very same values.
9
+ v, $VERBOSE = $VERBOSE, nil
10
+ load 'lib/biointerchange/core.rb'
11
+ load 'lib/biointerchange/cdao.rb'
12
+ load 'lib/biointerchange/reader.rb'
13
+ load 'lib/biointerchange/model.rb'
14
+ load 'lib/biointerchange/writer.rb'
15
+ load 'lib/biointerchange/phylogenetics/newick_reader.rb'
16
+ load 'lib/biointerchange/phylogenetics/tree_set.rb'
17
+ load 'lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb'
18
+ $VERBOSE = v
19
+
20
+ describe BioInterchange::Phylogenetics::NewickReader do
21
+ describe 'deserialization of Newick trees' do
22
+ it 'empty document' do
23
+ tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new(''))
24
+ trees = 0
25
+ tree_file.each_entry { |tree| trees += 1 }
26
+ trees.should eq(0)
27
+ end
28
+
29
+ it 'single Newick tree' do
30
+ tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new('(,,(,));'))
31
+ trees = 0
32
+ tree_file.each_entry { |newick_tree|
33
+ trees += 1
34
+ newick_tree.tree.edges.length.should eq(5)
35
+ newick_tree.tree.nodes.length.should eq(6)
36
+ }
37
+ trees.should eq(1)
38
+ end
39
+
40
+ it 'three Newick trees' do
41
+ tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new("(,,(,));\n(A,B,(C,D));\n(A,B,(C,D)E)F;"))
42
+ trees = 0
43
+ tree_file.each_entry { |newick_tree|
44
+ trees += 1
45
+ newick_tree.tree.edges.length.should eq(5)
46
+ newick_tree.tree.nodes.length.should eq(6)
47
+ }
48
+ trees.should eq(3)
49
+ end
50
+
51
+ it 'model consistency' do
52
+ model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
53
+ model.should be_an_instance_of BioInterchange::Phylogenetics::TreeSet
54
+ model.contents.length.should eq(1)
55
+ model.contents.first.edges.length.should eq(6)
56
+ model.contents.first.nodes.length.should eq(7)
57
+ end
58
+ end
59
+ end
60
+
61
+ describe BioInterchange::Phylogenetics::CDAORDFWriter do
62
+ describe 'serialization of tree models' do
63
+ it 'empty document' do
64
+ istream, ostream = IO.pipe
65
+ BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(BioInterchange::Phylogenetics::TreeSet.new())
66
+ ostream.close
67
+ istream.read.lines.count.should eq(0)
68
+ end
69
+
70
+ it 'single Newick tree' do
71
+ istream, ostream = IO.pipe
72
+ model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
73
+ BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(model)
74
+ ostream.close
75
+ istream.read.lines.count.should eq(151)
76
+ end
77
+ end
78
+ end
79
+
@@ -4,7 +4,7 @@
4
4
 
5
5
  <groupId>org.biointerchange</groupId>
6
6
  <artifactId>vocabularies</artifactId>
7
- <version>0.2.2</version>
7
+ <version>1.0.0</version>
8
8
  <packaging>jar</packaging>
9
9
 
10
10
  <name>BioInterchange Vocabularies</name>