biointerchange 0.2.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +269 -19
  3. data/VERSION +1 -1
  4. data/examples/bininda_emonds_mammals.new +1 -0
  5. data/examples/rdfization.rb +17 -0
  6. data/examples/tree1.new +1 -0
  7. data/examples/tree2.new +1 -0
  8. data/examples/vocabulary.rb +26 -5
  9. data/generators/javaify.rb +12 -18
  10. data/generators/make_supplement_releases.rb +2 -0
  11. data/generators/pythonify.rb +21 -8
  12. data/generators/rdfxml.rb +15 -1
  13. data/lib/biointerchange/cdao.rb +2014 -0
  14. data/lib/biointerchange/core.rb +70 -77
  15. data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +16 -0
  16. data/lib/biointerchange/genomics/gff3_reader.rb +18 -4
  17. data/lib/biointerchange/genomics/gvf_reader.rb +14 -0
  18. data/lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb +108 -0
  19. data/lib/biointerchange/phylogenetics/newick_reader.rb +81 -0
  20. data/lib/biointerchange/phylogenetics/tree_set.rb +50 -0
  21. data/lib/biointerchange/registry.rb +50 -8
  22. data/lib/biointerchange/so.rb +150 -0
  23. data/lib/biointerchange/textmining/pdfx_xml_reader.rb +21 -2
  24. data/lib/biointerchange/textmining/pubannos_json_reader.rb +24 -1
  25. data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +9 -0
  26. data/lib/biointerchange/textmining/text_mining_reader.rb +5 -5
  27. data/spec/phylogenetics_spec.rb +79 -0
  28. data/supplemental/java/biointerchange/pom.xml +1 -1
  29. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/CDAO.java +2602 -0
  30. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +30 -28
  31. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +136 -104
  32. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +367 -278
  33. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +4388 -3127
  34. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +5970 -4351
  35. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +733 -544
  36. data/supplemental/java/biointerchange/src/test/java/org/biointerchange/AppTest.java +3 -1
  37. data/supplemental/python/biointerchange/cdao.py +2021 -0
  38. data/supplemental/python/biointerchange/faldo.py +37 -38
  39. data/supplemental/python/biointerchange/gff3o.py +156 -157
  40. data/supplemental/python/biointerchange/goxref.py +172 -172
  41. data/supplemental/python/biointerchange/gvf1o.py +428 -429
  42. data/supplemental/python/biointerchange/sio.py +3133 -3134
  43. data/supplemental/python/biointerchange/so.py +6626 -6527
  44. data/supplemental/python/biointerchange/sofa.py +790 -791
  45. data/supplemental/python/example.py +23 -5
  46. data/supplemental/python/setup.py +2 -2
  47. data/web/about.html +1 -0
  48. data/web/api.html +223 -15
  49. data/web/biointerchange.js +27 -6
  50. data/web/cli.html +8 -3
  51. data/web/index.html +6 -2
  52. data/web/ontologies.html +3 -0
  53. data/web/service/rdfizer.fcgi +7 -15
  54. data/web/webservices.html +6 -2
  55. metadata +30 -3
@@ -13114,6 +13114,156 @@ class SO
13114
13114
  if uri == RDF::URI.new('http://purl.obolibrary.org/obo/SEQUENCE_variant_of') then
13115
13115
  return true
13116
13116
  end
13117
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/adjacent_to') then
13118
+ return true
13119
+ end
13120
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/associated_with') then
13121
+ return true
13122
+ end
13123
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/complete_evidence_for_feature') then
13124
+ return true
13125
+ end
13126
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/connects_on') then
13127
+ return true
13128
+ end
13129
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contained_by') then
13130
+ return true
13131
+ end
13132
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contains') then
13133
+ return true
13134
+ end
13135
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/derives_from') then
13136
+ return true
13137
+ end
13138
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/disconnected_from') then
13139
+ return true
13140
+ end
13141
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_from') then
13142
+ return true
13143
+ end
13144
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_to') then
13145
+ return true
13146
+ end
13147
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/evidence_for_feature') then
13148
+ return true
13149
+ end
13150
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/exemplar_of') then
13151
+ return true
13152
+ end
13153
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finished_by') then
13154
+ return true
13155
+ end
13156
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finishes') then
13157
+ return true
13158
+ end
13159
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/gained') then
13160
+ return true
13161
+ end
13162
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/genome_of') then
13163
+ return true
13164
+ end
13165
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guided_by') then
13166
+ return true
13167
+ end
13168
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guides') then
13169
+ return true
13170
+ end
13171
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_integral_part') then
13172
+ return true
13173
+ end
13174
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_origin') then
13175
+ return true
13176
+ end
13177
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_part') then
13178
+ return true
13179
+ end
13180
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_quality') then
13181
+ return true
13182
+ end
13183
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/homologous_to') then
13184
+ return true
13185
+ end
13186
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/integral_part_of') then
13187
+ return true
13188
+ end
13189
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/is_consecutive_sequence_of') then
13190
+ return true
13191
+ end
13192
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/lost') then
13193
+ return true
13194
+ end
13195
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/maximally_overlaps') then
13196
+ return true
13197
+ end
13198
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/member_of') then
13199
+ return true
13200
+ end
13201
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/non_functional_homolog_of') then
13202
+ return true
13203
+ end
13204
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/orthologous_to') then
13205
+ return true
13206
+ end
13207
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/overlaps') then
13208
+ return true
13209
+ end
13210
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/paralogous_to') then
13211
+ return true
13212
+ end
13213
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/part_of') then
13214
+ return true
13215
+ end
13216
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/partial_evidence_for_feature') then
13217
+ return true
13218
+ end
13219
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/position_of') then
13220
+ return true
13221
+ end
13222
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_from') then
13223
+ return true
13224
+ end
13225
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_into') then
13226
+ return true
13227
+ end
13228
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_from') then
13229
+ return true
13230
+ end
13231
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_to') then
13232
+ return true
13233
+ end
13234
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/sequence_of') then
13235
+ return true
13236
+ end
13237
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/similar_to') then
13238
+ return true
13239
+ end
13240
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/started_by') then
13241
+ return true
13242
+ end
13243
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/starts') then
13244
+ return true
13245
+ end
13246
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_from') then
13247
+ return true
13248
+ end
13249
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_to') then
13250
+ return true
13251
+ end
13252
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_from') then
13253
+ return true
13254
+ end
13255
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_to') then
13256
+ return true
13257
+ end
13258
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translates_to') then
13259
+ return true
13260
+ end
13261
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translation_of') then
13262
+ return true
13263
+ end
13264
+ if uri == RDF::URI.new('http://purl.obolibrary.org/obo/variant_of') then
13265
+ return true
13266
+ end
13117
13267
  return false
13118
13268
  end
13119
13269
 
@@ -5,6 +5,27 @@ require 'rexml/streamlistener'
5
5
 
6
6
  class PDFxXMLReader < BioInterchange::TextMining::TMReader
7
7
 
8
+ # Register reader:
9
+ BioInterchange::Registry.register_reader(
10
+ 'uk.ac.man.pdfx',
11
+ PDFxXMLReader,
12
+ [
13
+ 'name',
14
+ 'name_id',
15
+ 'date',
16
+ [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
17
+ 'version'
18
+ ],
19
+ false,
20
+ 'PDFx XML reader',
21
+ [
22
+ [ 'date <date>', 'date when the GFF3 file was created (optional)' ],
23
+ [ 'version <version>', 'version number of resource (optional)' ],
24
+ [ 'name <name>', 'name of the GFF3 file creator (required)' ],
25
+ [ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
26
+ ]
27
+ )
28
+
8
29
  # Reads input stream and returns associated +BioInterchange::TextMining::Document+ model
9
30
  #
10
31
  # Presently I assume a single document per xml file,
@@ -14,8 +35,6 @@ class PDFxXMLReader < BioInterchange::TextMining::TMReader
14
35
  #
15
36
  # +inputstream+:: Input IO stream to deserialize
16
37
  def deserialize(inputstream)
17
- #super(inputstream)
18
-
19
38
  raise BioInterchange::Exceptions::ImplementationReaderError, 'InputStream not of type IO, cannot read.' unless inputstream.kind_of?(IO) or inputstream.kind_of?(String)
20
39
 
21
40
  @input = inputstream
@@ -5,13 +5,36 @@ require 'json'
5
5
 
6
6
  class PubAnnosJSONReader < BioInterchange::TextMining::TMReader
7
7
 
8
+ # Register reader:
9
+ BioInterchange::Registry.register_reader(
10
+ 'dbcls.catanns.json',
11
+ PubAnnosJSONReader,
12
+ [
13
+ 'name',
14
+ 'name_id',
15
+ 'date',
16
+ [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
17
+ 'version'
18
+ ],
19
+ false,
20
+ 'PDFx XML reader',
21
+ [
22
+ [ 'date <date>', 'date when the GFF3 file was created (optional)' ],
23
+ [ 'version <version>', 'version number of resource (optional)' ],
24
+ [ 'name <name>', 'name of the GFF3 file creator (required)' ],
25
+ [ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
26
+ ]
27
+ )
28
+
29
+ # Deserialize a PubAnnotations JSON object.
30
+ #
31
+ # +inputstream+:: Input IO stream to deserialize
8
32
  def deserialize(inputstream)
9
33
  if inputstream.kind_of?(IO) then
10
34
  pubannos(inputstream.read)
11
35
  elsif inputstream.kind_of?(String) then
12
36
  pubannos(inputstream)
13
37
  else
14
- #else raise exception
15
38
  super(inputstream)
16
39
  end
17
40
  end
@@ -6,6 +6,15 @@ module BioInterchange::TextMining
6
6
 
7
7
  class RDFWriter < BioInterchange::Writer
8
8
 
9
+ # Register writers:
10
+ BioInterchange::Registry.register_writer(
11
+ 'rdf.bh12.sio',
12
+ RDFWriter,
13
+ [ 'dbcls.catanns.json', 'uk.ac.man.pdfx' ],
14
+ false,
15
+ 'Semanticscience Integrated Ontology (SIO) based text-mining RDFization'
16
+ )
17
+
9
18
  # Creates a new instance of a RDFWriter that will use the provided output stream to serialize RDF.
10
19
  #
11
20
  # +ostream+:: instance of an IO class or derivative that is used for RDF serialization
@@ -5,16 +5,16 @@ class TMReader < BioInterchange::Reader
5
5
  # Create a new instance of a text-mining data reader. Sets @process to a new +BioInterchange::TextMining::Process+ object.
6
6
  #
7
7
  # +name+:: Name of the process which generated this data
8
- # +name_uri+:: URI of the resource that generated this data
8
+ # +name_id+:: URI of the resource that generated this data
9
9
  # +date+:: Optional date of data creation
10
10
  # +processtype+:: Type of process that created this content
11
11
  # +version+:: Optional version number of resource that created this data (nil if manually curated, for example).
12
- def initialize(name, name_uri, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
13
-
12
+ def initialize(name, name_id, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
13
+ raise ArgumentError, 'Require "name" and "name_id" options to specify source of annotations (e.g., a manual annotators name, or software tool name) and their associated URI (e.g., email address, or webaddress).' unless name and name_id
14
+
14
15
  metadata = {}
15
16
  metadata[BioInterchange::TextMining::Process::VERSION] = version
16
- @process = BioInterchange::TextMining::Process.new(name, name_uri, processtype, metadata, date)
17
-
17
+ @process = BioInterchange::TextMining::Process.new(name, name_id, processtype, metadata, date)
18
18
  end
19
19
 
20
20
 
@@ -0,0 +1,79 @@
1
+
2
+ require 'rubygems'
3
+ require 'rspec'
4
+ require 'bio'
5
+
6
+ # Turn off verbose reporting here, since class definitions may be loaded multiple
7
+ # times here. That reports that constants have been already been initialized, which
8
+ # is true, but they are only "re-initialized" with the very same values.
9
+ v, $VERBOSE = $VERBOSE, nil
10
+ load 'lib/biointerchange/core.rb'
11
+ load 'lib/biointerchange/cdao.rb'
12
+ load 'lib/biointerchange/reader.rb'
13
+ load 'lib/biointerchange/model.rb'
14
+ load 'lib/biointerchange/writer.rb'
15
+ load 'lib/biointerchange/phylogenetics/newick_reader.rb'
16
+ load 'lib/biointerchange/phylogenetics/tree_set.rb'
17
+ load 'lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb'
18
+ $VERBOSE = v
19
+
20
+ describe BioInterchange::Phylogenetics::NewickReader do
21
+ describe 'deserialization of Newick trees' do
22
+ it 'empty document' do
23
+ tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new(''))
24
+ trees = 0
25
+ tree_file.each_entry { |tree| trees += 1 }
26
+ trees.should eq(0)
27
+ end
28
+
29
+ it 'single Newick tree' do
30
+ tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new('(,,(,));'))
31
+ trees = 0
32
+ tree_file.each_entry { |newick_tree|
33
+ trees += 1
34
+ newick_tree.tree.edges.length.should eq(5)
35
+ newick_tree.tree.nodes.length.should eq(6)
36
+ }
37
+ trees.should eq(1)
38
+ end
39
+
40
+ it 'three Newick trees' do
41
+ tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new("(,,(,));\n(A,B,(C,D));\n(A,B,(C,D)E)F;"))
42
+ trees = 0
43
+ tree_file.each_entry { |newick_tree|
44
+ trees += 1
45
+ newick_tree.tree.edges.length.should eq(5)
46
+ newick_tree.tree.nodes.length.should eq(6)
47
+ }
48
+ trees.should eq(3)
49
+ end
50
+
51
+ it 'model consistency' do
52
+ model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
53
+ model.should be_an_instance_of BioInterchange::Phylogenetics::TreeSet
54
+ model.contents.length.should eq(1)
55
+ model.contents.first.edges.length.should eq(6)
56
+ model.contents.first.nodes.length.should eq(7)
57
+ end
58
+ end
59
+ end
60
+
61
+ describe BioInterchange::Phylogenetics::CDAORDFWriter do
62
+ describe 'serialization of tree models' do
63
+ it 'empty document' do
64
+ istream, ostream = IO.pipe
65
+ BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(BioInterchange::Phylogenetics::TreeSet.new())
66
+ ostream.close
67
+ istream.read.lines.count.should eq(0)
68
+ end
69
+
70
+ it 'single Newick tree' do
71
+ istream, ostream = IO.pipe
72
+ model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
73
+ BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(model)
74
+ ostream.close
75
+ istream.read.lines.count.should eq(151)
76
+ end
77
+ end
78
+ end
79
+
@@ -4,7 +4,7 @@
4
4
 
5
5
  <groupId>org.biointerchange</groupId>
6
6
  <artifactId>vocabularies</artifactId>
7
- <version>0.2.2</version>
7
+ <version>1.0.0</version>
8
8
  <packaging>jar</packaging>
9
9
 
10
10
  <name>BioInterchange Vocabularies</name>