RubyGems - biointerchange - Versions diffs - 0.2.2 → 1.0.0 - Mend

biointerchange 0.2.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

data/Gemfile +1 -0
data/README.md +269 -19
data/VERSION +1 -1
data/examples/bininda_emonds_mammals.new +1 -0
data/examples/rdfization.rb +17 -0
data/examples/tree1.new +1 -0
data/examples/tree2.new +1 -0
data/examples/vocabulary.rb +26 -5
data/generators/javaify.rb +12 -18
data/generators/make_supplement_releases.rb +2 -0
data/generators/pythonify.rb +21 -8
data/generators/rdfxml.rb +15 -1
data/lib/biointerchange/cdao.rb +2014 -0
data/lib/biointerchange/core.rb +70 -77
data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +16 -0
data/lib/biointerchange/genomics/gff3_reader.rb +18 -4
data/lib/biointerchange/genomics/gvf_reader.rb +14 -0
data/lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb +108 -0
data/lib/biointerchange/phylogenetics/newick_reader.rb +81 -0
data/lib/biointerchange/phylogenetics/tree_set.rb +50 -0
data/lib/biointerchange/registry.rb +50 -8
data/lib/biointerchange/so.rb +150 -0
data/lib/biointerchange/textmining/pdfx_xml_reader.rb +21 -2
data/lib/biointerchange/textmining/pubannos_json_reader.rb +24 -1
data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +9 -0
data/lib/biointerchange/textmining/text_mining_reader.rb +5 -5
data/spec/phylogenetics_spec.rb +79 -0
data/supplemental/java/biointerchange/pom.xml +1 -1
data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/CDAO.java +2602 -0
data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +30 -28
data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +136 -104
data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +367 -278
data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +4388 -3127
data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +5970 -4351
data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +733 -544
data/supplemental/java/biointerchange/src/test/java/org/biointerchange/AppTest.java +3 -1
data/supplemental/python/biointerchange/cdao.py +2021 -0
data/supplemental/python/biointerchange/faldo.py +37 -38
data/supplemental/python/biointerchange/gff3o.py +156 -157
data/supplemental/python/biointerchange/goxref.py +172 -172
data/supplemental/python/biointerchange/gvf1o.py +428 -429
data/supplemental/python/biointerchange/sio.py +3133 -3134
data/supplemental/python/biointerchange/so.py +6626 -6527
data/supplemental/python/biointerchange/sofa.py +790 -791
data/supplemental/python/example.py +23 -5
data/supplemental/python/setup.py +2 -2
data/web/about.html +1 -0
data/web/api.html +223 -15
data/web/biointerchange.js +27 -6
data/web/cli.html +8 -3
data/web/index.html +6 -2
data/web/ontologies.html +3 -0
data/web/service/rdfizer.fcgi +7 -15
data/web/webservices.html +6 -2
metadata +30 -3

data/lib/biointerchange/so.rb CHANGED Viewed

@@ -13114,6 +13114,156 @@ class SO
     if uri == RDF::URI.new('http://purl.obolibrary.org/obo/SEQUENCE_variant_of') then
       return true
     end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/adjacent_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/associated_with') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/complete_evidence_for_feature') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/connects_on') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contained_by') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/contains') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/derives_from') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/disconnected_from') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_from') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/edited_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/evidence_for_feature') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/exemplar_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finished_by') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/finishes') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/gained') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/genome_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guided_by') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/guides') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_integral_part') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_origin') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_part') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/has_quality') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/homologous_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/integral_part_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/is_consecutive_sequence_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/lost') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/maximally_overlaps') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/member_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/non_functional_homolog_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/orthologous_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/overlaps') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/paralogous_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/part_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/partial_evidence_for_feature') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/position_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_from') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/processed_into') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_from') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/recombined_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/sequence_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/similar_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/started_by') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/starts') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_from') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/trans_spliced_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_from') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/transcribed_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translates_to') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/translation_of') then
+      return true
+    end
+    if uri == RDF::URI.new('http://purl.obolibrary.org/obo/variant_of') then
+      return true
+    end
     return false
   end

data/lib/biointerchange/textmining/pdfx_xml_reader.rb CHANGED Viewed

@@ -5,6 +5,27 @@ require 'rexml/streamlistener'
 class PDFxXMLReader < BioInterchange::TextMining::TMReader
+  # Register reader:
+  BioInterchange::Registry.register_reader(
+    'uk.ac.man.pdfx',
+    PDFxXMLReader,
+    [
+      'name',
+      'name_id',
+      'date',
+      [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
+      'version'
+    ],
+    false,
+    'PDFx XML reader',
+    [
+      [ 'date <date>', 'date when the GFF3 file was created (optional)' ],
+      [ 'version <version>', 'version number of resource (optional)' ],
+      [ 'name <name>', 'name of the GFF3 file creator (required)' ],
+      [ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
+    ]
+  )
   # Reads input stream and returns associated +BioInterchange::TextMining::Document+ model
   #
   # Presently I assume a single document per xml file,
@@ -14,8 +35,6 @@ class PDFxXMLReader < BioInterchange::TextMining::TMReader
   #
   # +inputstream+:: Input IO stream to deserialize
   def deserialize(inputstream)
-    #super(inputstream)
     raise BioInterchange::Exceptions::ImplementationReaderError, 'InputStream not of type IO, cannot read.' unless inputstream.kind_of?(IO) or inputstream.kind_of?(String)
     @input = inputstream

data/lib/biointerchange/textmining/pubannos_json_reader.rb CHANGED Viewed

@@ -5,13 +5,36 @@ require 'json'
 class PubAnnosJSONReader < BioInterchange::TextMining::TMReader
+  # Register reader:
+  BioInterchange::Registry.register_reader(
+    'dbcls.catanns.json',
+    PubAnnosJSONReader,
+    [
+      'name',
+      'name_id',
+      'date',
+      [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ],
+      'version'
+    ],
+    false,
+    'PDFx XML reader',
+    [
+      [ 'date <date>', 'date when the GFF3 file was created (optional)' ],
+      [ 'version <version>', 'version number of resource (optional)' ],
+      [ 'name <name>', 'name of the GFF3 file creator (required)' ],
+      [ 'name_id <id>', 'email address of the GFF3 file creator (required)' ]
+    ]
+  )
+  # Deserialize a PubAnnotations JSON object.
+  #
+  # +inputstream+:: Input IO stream to deserialize
   def deserialize(inputstream)
     if inputstream.kind_of?(IO) then
       pubannos(inputstream.read)
     elsif inputstream.kind_of?(String) then
       pubannos(inputstream)
     else
-      #else raise exception
       super(inputstream)
     end
   end

data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb CHANGED Viewed

@@ -6,6 +6,15 @@ module BioInterchange::TextMining
 class RDFWriter < BioInterchange::Writer
+  # Register writers:
+  BioInterchange::Registry.register_writer(
+    'rdf.bh12.sio',
+    RDFWriter,
+    [ 'dbcls.catanns.json', 'uk.ac.man.pdfx' ],
+    false,
+    'Semanticscience Integrated Ontology (SIO) based text-mining RDFization'
+  )
   # Creates a new instance of a RDFWriter that will use the provided output stream to serialize RDF.
   #
   # +ostream+:: instance of an IO class or derivative that is used for RDF serialization

data/lib/biointerchange/textmining/text_mining_reader.rb CHANGED Viewed

@@ -5,16 +5,16 @@ class TMReader < BioInterchange::Reader
   # Create a new instance of a text-mining data reader. Sets @process to a new +BioInterchange::TextMining::Process+ object.
   #
   # +name+:: Name of the process which generated this data
-  # +name_uri+:: URI of the resource that generated this data
+  # +name_id+:: URI of the resource that generated this data
   # +date+:: Optional date of data creation
   # +processtype+:: Type of process that created this content
   # +version+:: Optional version number of resource that created this data (nil if manually curated, for example).
-  def initialize(name, name_uri, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
+  def initialize(name, name_id, date = nil, processtype = BioInterchange::TextMining::Process::UNSPECIFIED, version = nil)
+    raise ArgumentError, 'Require "name" and "name_id" options to specify source of annotations (e.g., a manual annotators name, or software tool name) and their associated URI (e.g., email address, or webaddress).' unless name and name_id
     metadata = {}
     metadata[BioInterchange::TextMining::Process::VERSION] = version
-    @process = BioInterchange::TextMining::Process.new(name, name_uri, processtype, metadata, date)
+    @process = BioInterchange::TextMining::Process.new(name, name_id, processtype, metadata, date)
   end

data/spec/phylogenetics_spec.rb ADDED Viewed

@@ -0,0 +1,79 @@
+require 'rubygems'
+require 'rspec'
+require 'bio'
+# Turn off verbose reporting here, since class definitions may be loaded multiple
+# times here. That reports that constants have been already been initialized, which
+# is true, but they are only "re-initialized" with the very same values.
+v, $VERBOSE = $VERBOSE, nil
+load 'lib/biointerchange/core.rb'
+load 'lib/biointerchange/cdao.rb'
+load 'lib/biointerchange/reader.rb'
+load 'lib/biointerchange/model.rb'
+load 'lib/biointerchange/writer.rb'
+load 'lib/biointerchange/phylogenetics/newick_reader.rb'
+load 'lib/biointerchange/phylogenetics/tree_set.rb'
+load 'lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb'
+$VERBOSE = v
+describe BioInterchange::Phylogenetics::NewickReader do
+  describe 'deserialization of Newick trees' do
+    it 'empty document' do
+      tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new(''))
+      trees = 0
+      tree_file.each_entry { |tree| trees += 1 }
+      trees.should eq(0)
+    end
+    it 'single Newick tree' do
+      tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new('(,,(,));'))
+      trees = 0
+      tree_file.each_entry { |newick_tree|
+        trees += 1
+        newick_tree.tree.edges.length.should eq(5)
+        newick_tree.tree.nodes.length.should eq(6)
+      }
+      trees.should eq(1)
+    end
+    it 'three Newick trees' do
+      tree_file = Bio::FlatFile.open(Bio::Newick, StringIO.new("(,,(,));\n(A,B,(C,D));\n(A,B,(C,D)E)F;"))
+      trees = 0
+      tree_file.each_entry { |newick_tree|
+        trees += 1
+        newick_tree.tree.edges.length.should eq(5)
+        newick_tree.tree.nodes.length.should eq(6)
+      }
+      trees.should eq(3)
+    end
+    it 'model consistency' do
+      model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
+      model.should be_an_instance_of BioInterchange::Phylogenetics::TreeSet
+      model.contents.length.should eq(1)
+      model.contents.first.edges.length.should eq(6)
+      model.contents.first.nodes.length.should eq(7)
+    end
+  end
+end
+describe BioInterchange::Phylogenetics::CDAORDFWriter do
+  describe 'serialization of tree models' do
+    it 'empty document' do
+      istream, ostream = IO.pipe
+      BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(BioInterchange::Phylogenetics::TreeSet.new())
+      ostream.close
+      istream.read.lines.count.should eq(0)
+    end
+    it 'single Newick tree' do
+      istream, ostream = IO.pipe
+      model = BioInterchange::Phylogenetics::NewickReader.new().deserialize('(A,B,(C,D,E)F)G;')
+      BioInterchange::Phylogenetics::CDAORDFWriter.new(ostream).serialize(model)
+      ostream.close
+      istream.read.lines.count.should eq(151)
+    end
+  end
+end

data/supplemental/java/biointerchange/pom.xml CHANGED Viewed

@@ -4,7 +4,7 @@
   <groupId>org.biointerchange</groupId>
   <artifactId>vocabularies</artifactId>
-  <version>0.2.2</version>
+  <version>1.0.0</version>
   <packaging>jar</packaging>
   <name>BioInterchange Vocabularies</name>