biointerchange 0.2.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +269 -19
  3. data/VERSION +1 -1
  4. data/examples/bininda_emonds_mammals.new +1 -0
  5. data/examples/rdfization.rb +17 -0
  6. data/examples/tree1.new +1 -0
  7. data/examples/tree2.new +1 -0
  8. data/examples/vocabulary.rb +26 -5
  9. data/generators/javaify.rb +12 -18
  10. data/generators/make_supplement_releases.rb +2 -0
  11. data/generators/pythonify.rb +21 -8
  12. data/generators/rdfxml.rb +15 -1
  13. data/lib/biointerchange/cdao.rb +2014 -0
  14. data/lib/biointerchange/core.rb +70 -77
  15. data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +16 -0
  16. data/lib/biointerchange/genomics/gff3_reader.rb +18 -4
  17. data/lib/biointerchange/genomics/gvf_reader.rb +14 -0
  18. data/lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb +108 -0
  19. data/lib/biointerchange/phylogenetics/newick_reader.rb +81 -0
  20. data/lib/biointerchange/phylogenetics/tree_set.rb +50 -0
  21. data/lib/biointerchange/registry.rb +50 -8
  22. data/lib/biointerchange/so.rb +150 -0
  23. data/lib/biointerchange/textmining/pdfx_xml_reader.rb +21 -2
  24. data/lib/biointerchange/textmining/pubannos_json_reader.rb +24 -1
  25. data/lib/biointerchange/textmining/text_mining_rdf_ntriples.rb +9 -0
  26. data/lib/biointerchange/textmining/text_mining_reader.rb +5 -5
  27. data/spec/phylogenetics_spec.rb +79 -0
  28. data/supplemental/java/biointerchange/pom.xml +1 -1
  29. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/CDAO.java +2602 -0
  30. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +30 -28
  31. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +136 -104
  32. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +367 -278
  33. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +4388 -3127
  34. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +5970 -4351
  35. data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +733 -544
  36. data/supplemental/java/biointerchange/src/test/java/org/biointerchange/AppTest.java +3 -1
  37. data/supplemental/python/biointerchange/cdao.py +2021 -0
  38. data/supplemental/python/biointerchange/faldo.py +37 -38
  39. data/supplemental/python/biointerchange/gff3o.py +156 -157
  40. data/supplemental/python/biointerchange/goxref.py +172 -172
  41. data/supplemental/python/biointerchange/gvf1o.py +428 -429
  42. data/supplemental/python/biointerchange/sio.py +3133 -3134
  43. data/supplemental/python/biointerchange/so.py +6626 -6527
  44. data/supplemental/python/biointerchange/sofa.py +790 -791
  45. data/supplemental/python/example.py +23 -5
  46. data/supplemental/python/setup.py +2 -2
  47. data/web/about.html +1 -0
  48. data/web/api.html +223 -15
  49. data/web/biointerchange.js +27 -6
  50. data/web/cli.html +8 -3
  51. data/web/index.html +6 -2
  52. data/web/ontologies.html +3 -0
  53. data/web/service/rdfizer.fcgi +7 -15
  54. data/web/webservices.html +6 -2
  55. metadata +30 -3
@@ -1,19 +1,37 @@
1
1
  import biointerchange
2
2
  from biointerchange import *
3
+ from rdflib.namespace import Namespace
4
+
5
+ def print_resource(resource):
6
+ print " " + resource
7
+ print " Ontology class: " + str(GFF3O.is_class(resource))
8
+ print " Ontology object property: " + str(GFF3O.is_object_property(resource))
9
+ print " Ontology datatype property: " + str(GFF3O.is_datatype_property(resource))
3
10
 
4
11
  # Get the URI of an ontology term by label:
5
- GFF3O.seqid()
12
+ print "'seqid' property:"
13
+ print_resource(GFF3O.seqid())
6
14
 
7
15
  # Ambiguous labels will return an array of URIs:
8
16
  # "start" can refer to a sub-property of "feature_properties" or "target_properties"
9
- GFF3O.start()
17
+ print "'start' properties:"
18
+ for start_synonym in GFF3O.start():
19
+ print_resource(start_synonym)
20
+
10
21
  # "feature_properties" can be either a datatype or object property
11
- GFF3O.feature_properties()
22
+ print "'feature_properties' properties:"
23
+ for feature_properties_synonym in GFF3O.feature_properties():
24
+ print_resource(feature_properties_synonym)
12
25
 
13
26
  # Use build-in method "is_datatype_property" to resolve ambiguity:
14
27
  # (Note: there is exactly one item in the result set, so the selection of the first item is acceptable.)
15
- feature_properties = filter(lambda uri: GFF3O.is_datatype_property(uri), GFF3O.feature_properties())[0]
28
+ feature_properties = filter(lambda uri: GFF3O.is_datatype_property(uri), GFF3O.feature_properties())
29
+ print "'feature_properties' properties, which are a datatype property:"
30
+ for feature_property in feature_properties:
31
+ print_resource(feature_property)
16
32
 
17
33
  # Use build-in method "with_parent" to pick properties based on their context:
18
- GFF3O.with_parent(GFF3O.start(), feature_properties)
34
+ print "'start' property with parent datatype property 'feature_properties':"
35
+ for feature_property in GFF3O.with_parent(GFF3O.start(), feature_properties[0]):
36
+ print_resource(feature_property)
19
37
 
@@ -2,8 +2,8 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name = "biointerchange",
5
- version = "0.2.2",
6
- author = "Joachim Baran, Geraint Duck",
5
+ version = "1.0.0",
6
+ author = "Joachim Baran, Kevin B. Cohen, Geraint Duck, Michel Dumontier, Jin-Dong Kim",
7
7
  description = ( "Wrapper for easy access to FALDO, GFF3O, GVF1O, SIO, SO and SOFA.",
8
8
  "Part of the BioInterchange project." ),
9
9
  license = "MIT",
data/web/about.html CHANGED
@@ -50,6 +50,7 @@
50
50
  <div class="span12">
51
51
  <h2>About BioInterchange</h2>
52
52
  <p>BioInterchange was conceived and designed during <a href="http://biosciencedbc.jp">NBDC</a>/<a href="http://dbcls.rois.ac.jp">DBCLS</a>'s <a href="http://2012.biohackathon.org">BioHackathon 2012</a>. Architecture and RDF serialization implementations were provided by <a href="http://joachimbaran.wordpress.com">Joachim Baran</a>, <a href="http://www.cs.man.ac.uk/~duckg">Geraint Duck</a> provided JSON and XML deserialization implementations and contributed to architecture decisions, guidance on ontology use and applications were given by <a href="http://compbio.ucdenver.edu/Hunter_lab/Cohen/index.shtml">Kevin B. Cohen</a> and <a href="http://dumontierlab.com">Michel Dumontier</a>, where Michel brought forward and extended the <a href="http://code.google.com/p/semanticscience/wiki/SIO">Semanticscience Integrated Ontology</a> (SIO). <a href="http://dbcls.rois.ac.jp/~jdkim">Jin-Dong Kim</a> helped to define ontology relationships for RDFizing DBCLS' PubAnnotation category annotations.</p>
53
+ <p><b>Contributors: </b><a href="http://joachimbaran.wordpress.com">Joachim Baran</a>, <a href="http://compbio.ucdenver.edu/Hunter_lab/Cohen/index.shtml">Kevin B. Cohen</a>, <a href="http://www.cs.man.ac.uk/~duckg">Geraint Duck</a>, <a href="http://dumontierlab.com">Michel Dumontier</a>, <a href="http://utah.academia.edu/BDurgahee">Begum Durgahee</a>, and <a href="http://dbcls.rois.ac.jp/~jdkim">Jin-Dong Kim</a></p>
53
54
  <h2>Contact</h2>
54
55
  <p><a href="http://joachimbaran.wordpress.com">Joachim Baran</a><br /><i>joachim.baran</i><b>&#64;</b><i>gmail.com</i></p>
55
56
  </div>
data/web/api.html CHANGED
@@ -55,9 +55,8 @@
55
55
  <pre>
56
56
  sudo gem install biointerchange
57
57
  </pre>
58
- <h4>The BioInterchange Framework</h4>
59
- <em>TODO... sorry.</em>
60
58
  <h4>Vocabulary Wrappers</h4>
59
+ <p>Ruby classes are provided for the ontologies that is used for serializing RDF. Each ontology is represented by its own Ruby class. The classes provide access to the ontology terms and additional methods for resolving OWL classes, datatype properties and object properties.</p>
61
60
  <p>Usage example (see also <a href="https://github.com/BioInterchange/BioInterchange/blob/master/examples/vocabulary.rb">vocabulary.rb</a>):</p>
62
61
  <pre>
63
62
  require 'rubygems'
@@ -65,48 +64,247 @@ require 'biointerchange'
65
64
 
66
65
  include BioInterchange
67
66
 
67
+ def print_resource(resource)
68
+ puts " #{resource}"
69
+ puts " Ontology class: #{GFF3O.is_class?(resource)}"
70
+ puts " Ontology object property: #{GFF3O.is_object_property?(resource)}"
71
+ puts " Ontology datatype property: #{GFF3O.is_datatype_property?(resource)}"
72
+ end
73
+
68
74
  # Get the URI of an ontology term by label:
69
- GFF3O.seqid()
75
+ puts "'seqid' property:"
76
+ print_resource(GFF3O.seqid())
70
77
 
71
78
  # Ambiguous labels will return an array of URIs:
72
79
  # "start" can refer to a sub-property of "feature_properties" or "target_properties"
73
- GFF3O.start()
80
+ puts "'start' properties:"
81
+ GFF3O.start().each { |start_synonym|
82
+ print_resource(start_synonym)
83
+ }
74
84
  # "feature_properties" can be either a datatype or object property
75
- GFF3O.feature_properties()
85
+ puts "'feature_properties' properties:"
86
+ GFF3O.feature_properties().each { |feature_properties_synonym|
87
+ print_resource(feature_properties_synonym)
88
+ }
76
89
 
77
90
  # Use build-in method "is_datatype_property" to resolve ambiguity:
78
91
  # (Note: there is exactly one item in the result set, so the selection of the first item is acceptable.)
79
- feature_properties = GFF3O.feature_properties().select { |uri| GFF3O.is_datatype_property(uri) }[0]
92
+ feature_properties = GFF3O.feature_properties().select { |uri| GFF3O.is_datatype_property?(uri) }
93
+ puts "'feature_properties' properties, which are a datatype property:"
94
+ feature_properties.each { |feature_property|
95
+ print_resource(feature_property)
96
+ }
80
97
 
81
98
  # Use build-in method "with_parent" to pick properties based on their context:
82
- GFF3O.with_parent(GFF3O.start(), feature_properties)
99
+ puts "'start' property with parent datatype property 'feature_properties':"
100
+ GFF3O.with_parent(GFF3O.start(), feature_properties[0]).each { |feature_property|
101
+ print_resource(feature_property)
102
+ }
103
+ </pre>
104
+ <p>With the BioInterchange gem installed, the example can be executed on the command line via:</p>
105
+ <pre>
106
+ git clone git://github.com/BioInterchange/BioInterchange.git
107
+ cd BioInterchange
108
+ git checkout v1.0.0
109
+ ruby examples/vocabulary.rb
110
+ </pre>
111
+ <h4>RDFization Framework</h4>
112
+ <p>Usage example (see also <a href="https://github.com/BioInterchange/BioInterchange/blob/master/examples/rdfization.rb">rdfization.rb</a>):</p>
113
+ <pre>
114
+ require 'rubygems'
115
+ require 'biointerchange'
116
+
117
+ include BioInterchange::Phylogenetics
118
+
119
+ # Create a reader that reads phylogenetic trees in Newick format:
120
+ reader = NewickReader.new()
121
+
122
+ # Create a model from a single example tree:
123
+ # (Note: the `deserialize` method also takes streams as parameter -- not just strings.)
124
+ model = reader.deserialize('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')
125
+
126
+ # Serialize the model as RDF N-Triples to STDOUT:
127
+ CDAORDFWriter.new(STDOUT).serialize(model)
128
+ </pre>
129
+ <h4>Implementing New Readers, Models and Writers</h4>
130
+ <p>New readers, models and writers are best adopted from or build upon the existing implementations. The phylogenetic trinity of Newick file format reader, <a href="http://bioruby.org">BioRuby</a> based tree model, and <a href="http://sourceforge.net/apps/mediawiki/cdao/index.php?title=Main_Page">CDAO</a> RDF writer is used here as a guidline due to its simplicity.</p>
131
+ <h5>Reader: Creating an Object Model</h5>
132
+ <p>The quintessential Newick tree reader is depicted below. Its class is placed in a Ruby module that encapsulates all phylogenetic related source code. The <code>NewickReader</code> class inherits from the BioInterchange framework class <code>Reader</code> that provides method stubs which need to be overwritten. Using the central registry <code>BioInterchange::Registry</code>, the reader informs the framework of its: unique identifier (<code>phylotastic.newick</code>), Ruby class (<code>NewickReader</code>), command line parameters that it accepts (<code>date</code>, which becomes <code>--annotate_date</code>), whether the reader can operate without reading the complete input all at once (<code>true</code>), a descriptive name of the reader (<code>Newick Tree [...]</code>), and an array with descriptions for each parameter stated above.</p>
133
+ <p>Deserialization of Newick trees is done using the <code>deserialize</code> method, which must take both strings and input streams as valid arguments. If this contraint is not satisfied, then an <code>ImplementationReaderError</code> is thrown that is caught by the framework and handled appropriately.</p>
134
+ <p>Finally, the <code>postponed?</code> method keeps track of deferred input processing. If the batch size was reached and the model was passed on for serialization to a writer, then this method will have to return <code>true</code>.</p>
135
+ <pre>
136
+ require 'bio'
137
+
138
+ module BioInterchange::Phylogenetics
139
+
140
+ class NewickReader &lt; BioInterchange::Reader
141
+
142
+ # Register reader:
143
+ BioInterchange::Registry.register_reader(
144
+ 'phylotastic.newick',
145
+ NewickReader,
146
+ [ 'date' ],
147
+ true,
148
+ 'Newick Tree File Format reader',
149
+ [
150
+ [ 'date <date>', 'date when the Newick file was created (optional)' ]
151
+ ]
152
+ )
153
+
154
+ # Creates a new instance of a Newick file format reader.
155
+ #
156
+ # The reader supports batch processing.
157
+ #
158
+ # +date+:: Optional date of when the Newick file was produced, annotated, etc.
159
+ # +batch_size+:: Optional integer that determines that number of features that
160
+ # should be processed in one go.
161
+ def initialize(date = nil, batch_size = nil)
162
+ @date = date
163
+ @batch_size = batch_size
164
+ end
165
+
166
+ # Reads a Newick file from the input stream and returns an associated model.
167
+ #
168
+ # If this method is called when +postponed?+ returns true, then the reading will
169
+ # continue from where it has been interrupted beforehand.
170
+ #
171
+ # +inputstream+:: an instance of class IO or String that holds the contents of a Newick file
172
+ def deserialize(inputstream)
173
+ if inputstream.kind_of?(IO)
174
+ create_model(inputstream)
175
+ elsif inputstream.kind_of?(String) then
176
+ create_model(StringIO.new(inputstream))
177
+ else
178
+ raise BioInterchange::Exceptions::ImplementationReaderError, 'The provided input stream needs to be either of type IO or String.'
179
+ end
180
+ end
181
+
182
+ # Returns true if the reading of the input was postponed due to a full batch.
183
+ def postponed?
184
+ @postponed
185
+ end
186
+
187
+ protected
188
+
189
+ # ...concrete implementation omitted.
190
+ </pre>
191
+ <h5>Tree Model</h5>
192
+ <p>A model is created by a reader and it is subsequently consumed by a writer. The phylogenetic tree model inherits <code>BioInterchange::Model</code> which defines the <code>prune</code> method. If batch operation is in place, i.e. the input is not completely read into memory, then the <code>prune</code> method will be called to instruct the model to drop all information that has not to be kept in memory anymore. In a sense, this can be seen as a form of garbage collection, where data that has been serialized is purged from memory.</p>
193
+ <pre>
194
+ module BioInterchange::Phylogenetics
195
+
196
+ # A phylogenetic tree set that can contain multiple phylogenetic trees.
197
+ class TreeSet &lt; BioInterchange::Model
198
+
199
+ # Create a new instance of a tree set. A tree set can contain multiple phylogenetic trees.
200
+ def initialize
201
+ # Trees are stored as the keys of a hash map to increase performance:
202
+ @set = {}
203
+ end
204
+
205
+ # ...omitted internal data structure handling.
206
+
207
+ # Removes all features from the set, but keeps additional data (e.g., the date).
208
+ def prune
209
+ @set.clear
210
+ end
211
+
212
+ end
213
+
214
+ end
215
+ </pre>
216
+ <h5>Writer: From Object Model to RDF</h5>
217
+ <p>The writer takes an object model and serializes it via the <code>BioInterchange::Writer</code> derived <code>serialize</code> method. A writer uses <code>BioInterchange::Registry</code> to make itself known to the BioInterchange framework, where it signs up using the following arguments: a unique identifier (<code>rdf.phylotastic.newick</code>), its implementing class (<code>CDAORDFWriter</code>), a list of readers that it is compatible with (<code>phylotastic.newick</code>), whether the writer supports batch processing where only parts of the input need to be kept in memory (<code>true</code>), and a descriptive name for the writer.</p>
218
+ <pre>
219
+ require 'rdf'
220
+ require 'rdf/ntriples'
221
+
222
+ module BioInterchange::Phylogenetics
223
+
224
+ # Serialized phylogenetic tree models based on BioRuby's phylogenetic tree implementation.
225
+ class CDAORDFWriter &lt; BioInterchange::Writer
226
+
227
+ # Register writers:
228
+ BioInterchange::Registry.register_writer(
229
+ 'rdf.phylotastic.newick',
230
+ CDAORDFWriter,
231
+ [ 'phylotastic.newick' ],
232
+ true,
233
+ 'Comparative Data Analysis Ontology (CDAO) based RDFization'
234
+ )
235
+
236
+ # Creates a new instance of a CDAORDFWriter that will use the provided output stream to serialize RDF.
237
+ #
238
+ # +ostream+:: instance of an IO class or derivative that is used for RDF serialization
239
+ def initialize(ostream)
240
+ @ostream = ostream
241
+ end
242
+
243
+ # Serialize a model as RDF.
244
+ #
245
+ # +model+:: a generic representation of input data that is an instance of BioInterchange::Phylogenetics::TreeSet
246
+ def serialize(model)
247
+ model.contents.each { |tree|
248
+ serialize_model(model, tree)
249
+ }
250
+ end
251
+
252
+ protected
253
+
254
+ # ...omitted actual serialization implementation.
83
255
  </pre>
84
256
  <h3>Python API</h3>
85
257
  <p>Vocabulary wrappers in Python are available as an egg, that can be installed via <a href="http://pypi.python.org/pypi/setuptools">easy_install</a>:</p>
86
258
  <pre>
87
259
  sudo easy_install rdflib
88
- sudo easy_install http://www.biointerchange.org/eggs/biointerchange-0.2.2-py2.7.egg
260
+ sudo easy_install http://www.biointerchange.org/eggs/biointerchange-1.0.0-py2.7.egg
89
261
  </pre>
90
262
  <p>Usage example (see also <a href="https://github.com/BioInterchange/BioInterchange/blob/master/supplemental/python/example.py">example.py</a>):</p>
91
263
  <pre>
92
264
  import biointerchange
93
265
  from biointerchange import *
266
+ from rdflib.namespace import Namespace
267
+
268
+ def print_resource(resource):
269
+ print " " + resource
270
+ print " Ontology class: " + str(GFF3O.is_class(resource))
271
+ print " Ontology object property: " + str(GFF3O.is_object_property(resource))
272
+ print " Ontology datatype property: " + str(GFF3O.is_datatype_property(resource))
94
273
 
95
274
  # Get the URI of an ontology term by label:
96
- GFF3O.seqid()
275
+ print "'seqid' property:"
276
+ print_resource(GFF3O.seqid())
97
277
 
98
278
  # Ambiguous labels will return an array of URIs:
99
279
  # "start" can refer to a sub-property of "feature_properties" or "target_properties"
100
- GFF3O.start()
280
+ print "'start' properties:"
281
+ for start_synonym in GFF3O.start():
282
+ print_resource(start_synonym)
283
+
101
284
  # "feature_properties" can be either a datatype or object property
102
- GFF3O.feature_properties()
285
+ print "'feature_properties' properties:"
286
+ for feature_properties_synonym in GFF3O.feature_properties():
287
+ print_resource(feature_properties_synonym)
103
288
 
104
289
  # Use build-in method "is_datatype_property" to resolve ambiguity:
105
290
  # (Note: there is exactly one item in the result set, so the selection of the first item is acceptable.)
106
- feature_properties = filter(lambda uri: GFF3O.is_datatype_property(uri), GFF3O.feature_properties())[0]
291
+ feature_properties = filter(lambda uri: GFF3O.is_datatype_property(uri), GFF3O.feature_properties())
292
+ print "'feature_properties' properties, which are a datatype property:"
293
+ for feature_property in feature_properties:
294
+ print_resource(feature_property)
107
295
 
108
296
  # Use build-in method "with_parent" to pick properties based on their context:
109
- GFF3O.with_parent(GFF3O.start(), feature_properties)
297
+ print "'start' property with parent datatype property 'feature_properties':"
298
+ for feature_property in GFF3O.with_parent(GFF3O.start(), feature_properties[0]):
299
+ print_resource(feature_property)
300
+ </pre>
301
+ <p>The example can be executed on the command line via:</p>
302
+ <pre>
303
+ git clone git://github.com/BioInterchange/BioInterchange.git
304
+ cd BioInterchange
305
+ git checkout v1.0.0
306
+ cd supplemental/python
307
+ python example.py
110
308
  </pre>
111
309
  <h3>Java API</h3>
112
310
  <p>Vocabulary wrappers in Java are available as a Maven artifact. Add the following repository and dependency setting to your Project Object Model (POM) file:</p>
@@ -123,7 +321,7 @@ GFF3O.with_parent(GFF3O.start(), feature_properties)
123
321
  &lt;dependency&gt;
124
322
  &lt;groupId&gt;org.biointerchange&lt;/groupId&gt;
125
323
  &lt;artifactId&gt;vocabularies&lt;/artifactId&gt;
126
- &lt;version&gt;0.2.2&lt;/version&gt;
324
+ &lt;version&gt;1.0.0&lt;/version&gt;
127
325
  &lt;/dependency&gt;
128
326
  &lt;/dependencies&gt;
129
327
  </pre>
@@ -188,7 +386,17 @@ public class App
188
386
  }
189
387
  }
190
388
  </pre>
191
-
389
+ <p>Another example that uses SIO instead of GFF3O is provided as <a href="https://github.com/BioInterchange/BioInterchange/blob/master/supplemental/java/biointerchange/src/main/java/org/biointerchange/AppSIO.java">AppSIO.java</a>.</p>
390
+ <p>The examples can be executed through Maven:</p>
391
+ <pre>
392
+ git clone git://github.com/BioInterchange/BioInterchange.git
393
+ cd BioInterchange
394
+ git checkout v1.0.0
395
+ cd supplemental/java/biointerchange
396
+ mvn compile
397
+ mvn exec:java -Dexec.mainClass="org.biointerchange.App"
398
+ mvn exec:java -Dexec.mainClass="org.biointerchange.AppSIO"
399
+ </pre>
192
400
  <hr>
193
401
 
194
402
  <footer>
@@ -5,7 +5,7 @@
5
5
  * '#output': text field to which the RDF output is appended to
6
6
  */
7
7
  function generateRDF() {
8
- if ($('#inputformat').val() == 'biointerchange.gff3' || $('#inputformat').val() == 'biointerchange.gvf' || $('#inputformat').val() == 'dbcls.catanns.json' || $('#inputformat').val() == 'uk.ac.man.pdfx') {
8
+ if ($('#inputformat').val() == 'biointerchange.gff3' || $('#inputformat').val() == 'biointerchange.gvf' || $('#inputformat').val() == 'dbcls.catanns.json' || $('#inputformat').val() == 'phylotastic.newick' || $('#inputformat').val() == 'uk.ac.man.pdfx') {
9
9
  request = '{ "parameters" : "' + escape($('#metainput').val()) + '", "data" : "' + escape($('#maininput').val()) + '" }'
10
10
  $.ajax({
11
11
  type: 'POST',
@@ -54,6 +54,14 @@ function selectDbclsCatannsJson() {
54
54
  outputFormats[i].selected = false;
55
55
  outputFormats[i].disabled = true;
56
56
  }
57
+ } else if ($('#inputformat').val() == 'phylotastic.newick') {
58
+ if (outputFormats[i].value == 'rdf.phylotastic.newick') {
59
+ outputFormats[i].selected = true;
60
+ outputFormats[i].disabled = false;
61
+ } else {
62
+ outputFormats[i].selected = false;
63
+ outputFormats[i].disabled = true;
64
+ }
57
65
  } else if ($('#inputformat').val() == 'uk.ac.man.pdfx') {
58
66
  if (outputFormats[i].value == 'rdf.bh12.sio') {
59
67
  outputFormats[i].selected = true;
@@ -110,11 +118,13 @@ function pasteExample() {
110
118
  "##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/so.obo?revision=1.283\n" +
111
119
  "##data-source Source=ensembl;version=71;url=http://e71.ensembl.org/Saccharomyces_cerevisiae\n" +
112
120
  "##sequence-region I 1 230218\n" +
113
- "I\tSGRP\tSNV\t84\t84\t.\t+\t.\tID=1;Variant_seq=A;Variant_effect=upstream_gene_variant 0 transcript YAL067W-A,upstream_gene_variant 0 transcript YAL069W,upstream_gene_variant 0 transcript YAL068W-A,downstream_gene_variant 0 transcript YAL068C;Dbxref=SGRP:s01-84;Reference_seq=G\n" +
114
- "I\tSGRP\tSNV\t109\t109\t.\t+\t.\tID=2;Variant_seq=C;Variant_effect=upstream_gene_variant 0 transcript YAL067W-A,upstream_gene_variant 0 transcript YAL068W-A,upstream_gene_variant 0 transcript YAL069W,downstream_gene_variant 0 transcript YAL068C;Dbxref=SGRP:s01-109;Reference_seq=G\n" +
115
- "I\tSGRP\tSNV\t111\t111\t.\t+\t.\tID=3;Variant_seq=T;Variant_effect=upstream_gene_variant 0 transcript YAL067W-A,upstream_gene_variant 0 transcript YAL069W,upstream_gene_variant 0 transcript YAL068W-A,downstream_gene_variant 0 transcript YAL068C;Dbxref=SGRP:s01-111;Reference_seq=C\n" +
116
- "I\tSGRP\tSNV\t114\t114\t.\t+\t.\tID=4;Variant_seq=C;Variant_effect=upstream_gene_variant 0 transcript YAL067W-A,upstream_gene_variant 0 transcript YAL068W-A,upstream_gene_variant 0 transcript YAL069W,downstream_gene_variant 0 transcript YAL068C;Dbxref=SGRP:s01-114;Reference_seq=T\n" +
117
- "I\tSGRP\tSNV\t115\t115\t.\t+\t.\tID=5;Variant_seq=G;Variant_effect=upstream_gene_variant 0 transcript YAL067W-A,upstream_gene_variant 0 transcript YAL068W-A,upstream_gene_variant 0 transcript YAL069W,downstream_gene_variant 0 transcript YAL068C;Dbxref=SGRP:s01-115;Reference_seq=C\n"
121
+
122
+ "##gff-version 3\n" +
123
+ "##gvf-version 1.06\n" +
124
+ "##species http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606\n" +
125
+ "##file-date 2012-11-29\n" +
126
+ "##genome-build NCBI NCBI36\n" +
127
+ "Chr1 DGVa copy_number_loss 4320920 4323975 . . . ID=1;Name=essv2585519;variant_call_so_id=SO:0001743;parent=esv275066;Start_range=4320920,4321865;End_range=4323946,4323975;submitter_variant_id=IgH3.4SSV1;samples=Unknown;var_origin=Not tested;Variant_seq=.\n"
118
128
  );
119
129
  } else if ($('#inputformat').val() == 'dbcls.catanns.json') {
120
130
  $('#metainput').val(
@@ -144,6 +154,17 @@ function pasteExample() {
144
154
  " ]\n" +
145
155
  "}\n"
146
156
  );
157
+ } else if ($('#inputformat').val() == 'phylotastic.newick') {
158
+ $('#metainput').val(
159
+ "{\n" +
160
+ " \"input\" : \"phylotastic.newick\",\n" +
161
+ " \"output\" : \"rdf.phylotastic.newick\",\n" +
162
+ " \"date\" : \"2012-07-19\"\n" +
163
+ "}\n"
164
+ );
165
+ $('#maininput').val(
166
+ "((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;\n"
167
+ );
147
168
  } else if ($('#inputformat').val() == 'uk.ac.man.pdfx') {
148
169
  $('#metainput').val(
149
170
  "{\n" +
data/web/cli.html CHANGED
@@ -60,14 +60,17 @@ gem install biointerchange
60
60
  <p>
61
61
  Examples:
62
62
  <pre>
63
- biointerchange --input dbcls.catanns.json --rdf rdf.bh12.sio --file examples/pubannotation.10096561.json --name 'Peter Smith' --name_id 'peter.smith@example.com'
64
- biointerchange --input uk.ac.man.pdfx --rdf rdf.bh12.sio --file examples/gb-2007-8-3-R40.xml --name 'Peter Smith' --name_id 'peter.smith@example.com'
63
+ biointerchange --input biointerchange.gvf --rdf rdf.biointerchange.gvf --batchsize 100 --file examples/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf
64
+ biointerchange --input dbcls.catanns.json --rdf rdf.bh12.sio --file examples/pubannotation.10096561.json --annotate_name 'Peter Smith' --annotate_name_id 'peter.smith@example.com'
65
+ biointerchange --input uk.ac.man.pdfx --rdf rdf.bh12.sio --file examples/gb-2007-8-3-R40.xml --annotate_name 'Peter Smith' --annotate_name_id 'peter.smith@example.com'
66
+ biointerchange --input phylotastic.newick --rdf rdf.phylotastic.newick --file examples/tree2.new --annotate_date '1 June 2006'
65
67
  </pre>
66
68
  Input formats:
67
69
  <ul>
68
70
  <li><code>biointerchange.gff3</code>: <a href="http://www.sequenceontology.org/resources/gff3.html">Generic Feature Format Version 3</a></li>
69
71
  <li><code>biointerchange.gvf</code>: <a href="http://www.sequenceontology.org/resources/gvf.html">Genome Variation Format</a></li>
70
72
  <li><code>dbcls.catanns.json</code>: <a href="http://pubannotation.dbcls.jp">PubAnnotation categorical annotations</a></li>
73
+ <li><code>phylotastic.newick</code>: <a href="http://evolution.genetics.washington.edu/phylip/newicktree.html">Newick tree file format</a></li>
71
74
  <li><code>uk.ac.man.pdfx</code>: <a href=http://pdfx.cs.man.ac.uk">PDFx</a></li>
72
75
  </ul>
73
76
  Output formats:
@@ -75,11 +78,12 @@ biointerchange --input uk.ac.man.pdfx --rdf rdf.bh12.sio --file examples/gb-2007
75
78
  <li><code>rdf.biointerchange.gff3</code>: RDFization of <code>biointerchange.gff3</code></li>
76
79
  <li><code>rdf.biointerchange.gvf</code>: RDFization of <code>biointerchange.gvf</code></li>
77
80
  <li><code>rdf.bh12.sio</code>: RDFization of <code>dbcls.catanns.json</code> or <code>uk.ac.man.pdfx</code></li>
81
+ <li><code>rdf.phylotastic.newick</code>: RDFization of <code>phylotastic.newick</code></li>
78
82
  </ul>
79
83
  </p>
80
84
  <h4>Using a Triple Store</h4>
81
85
  <p>
82
- RDF data produced by BioInterchange can be directly loaded into a triple store. The following gives an example about loading and querying RDF data using <a href="http://www.openrdf.org">Sesame</a>]; the commands are entered via Sesame's <code>bin/console.sh</code>:
86
+ RDF data produced by BioInterchange can be directly loaded into a triple store. The following gives an example about loading and querying RDF data using <a href="http://www.openrdf.org">Sesame</a>; the commands are entered via Sesame's <code>bin/console.sh</code>:
83
87
  <pre>
84
88
  &gt; create memory.
85
89
  Please specify values for the following variables:
@@ -110,6 +114,7 @@ java -d64 -Xmx4G -jar HermiT.jar -k -v merged.xml
110
114
  <p>
111
115
  The following list provides information on the origin of the example-data files in the <code>examples</code> directory:
112
116
  <ul>
117
+ <li><code>bininda_emonds_mammals.new</code>: Newick formatted Bininda-Emonds mammals tree (see <a href="http://www.ncbi.nlm.nih.gov/pubmed/17392779">The delayed rise of present-day mammals</a>). Downloaded from <a href="https://github.com/bendmorris/rdf-treestore/blob/master/trees/bininda_emonds_mammals.new">https://github.com/bendmorris/rdf-treestore/blob/master/trees/bininda_emonds_mammals.new</a></li>
113
118
  <li><code>BovineGenomeChrX.gff3.gz</code>: Gzipped GFF3 file describing a Bos taurus chromosome X. Downloaded from <a href="http://bovinegenome.org/?q=download_chromosome_gff3">http://bovinegenome.org/?q=download_chromosome_gff3</a></li>
114
119
  <li><code>chromosome_BF.gff</code>: GFF3 file of floating contigs from the Baylor Sequencing Centre. Downloaded from <a href="http://dictybase.org/Downloads">http://dictybase.org/Downloads</a></li>
115
120
  <li><code>estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf</code>: GVF file of EBI's <a href="http://www.ebi.ac.uk/dgva/database-genomic-variants-archive">DGVa</a>. Downloaded from <a href="ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd176_Banerjee_et_al_2011/gvf/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf">ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd176_Banerjee_et_al_2011/gvf/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf</a></li>
data/web/index.html CHANGED
@@ -48,7 +48,7 @@
48
48
  <div class="hero-unit">
49
49
  <div style="float: left; margin-right: 24px;"><a href="http://www.biointerchange.org"><img width=150 height=150 src="images/BioInterchange300.png" /></a></div>
50
50
  <h1>BioInterchange</h1>
51
- <p>Interchange data using the Resource Description Framework (RDF) and let BioInterchange automagically create RDF triples from your TSV, XML, GFF3, GVF and other files. BioInterchange helps you transform your data sets into linked data for sharing and data integration via command line, web-service, or API.</p>
51
+ <p>Interchange data using the Resource Description Framework (RDF) and let BioInterchange automagically create RDF triples from your TSV, XML, GFF3, GVF, Newick and other files. BioInterchange helps you transform your data sets into linked data for sharing and data integration via command line, web-service, or API.</p>
52
52
  <!-- <p><a class="btn btn-primary btn-large">Learn more &raquo;</a></p> -->
53
53
  </div>
54
54
 
@@ -56,11 +56,13 @@
56
56
  <div class="span6" style="text-align: justify;">
57
57
  <h2>Shell Tool</h2>
58
58
  <p>Create RDF files from large data sets using the BioInterchange shell tool. It is installed as a Ruby gem that works with Mac OS X, Linux and Windows.</p>
59
+ <p>Check out how the shell tool turns data into RDF using a single command line call, how the data can be loaded into a triple store, and how to use a semantic reasoner to verify your data consistency!</p>
59
60
  <p><a class="btn" href="cli.html">View details &raquo;</a></p>
60
61
  </div>
61
62
  <div class="span6" style="text-align: justify;">
62
63
  <h2>Web-Services</h2>
63
64
  <p>Quickly RDFize small data snippets with BioInterchange's web-service. Either copy/paste your data into an interactive web-site, or use the RESTful web-service.</p>
65
+ <p>Example data is provided for every input format that BioInterchange can turn into RDF. Turn it into RDF with a click of a button!</p>
64
66
  <p><a class="btn" href="webservices.html">View details &raquo;</a></em></p>
65
67
  </div>
66
68
  </div>
@@ -68,12 +70,14 @@
68
70
  <div class="span6" style="text-align: justify;">
69
71
  <h2>API</h2>
70
72
  <p>Integrate BioInterchange into your Ruby programs as a gem. The small footprint API makes it possible to create RDF from simple data structures in a few lines.</p>
73
+ <p>See how the BioInterchange API framework can be used with Ruby, Python and Java; learn how to implement your own RDF transformation algorithms!</p>
71
74
  <p><a class="btn" href="api.html">View details &raquo;</a></p>
72
75
  <p><a class="btn" href="../doc/BioInterchange.html">View Ruby docs &raquo;</a></p>
73
76
  </div>
74
77
  <div class="span6" style="text-align: justify;">
75
78
  <h2>Ontologies</h2>
76
- <p>Reuse ontologies that we specifically developed to tackle some RDFizations. External ontologies that are used by BioInterchange are listed here too.
79
+ <p>Reuse ontologies that we specifically developed to tackle some RDFizations. External ontologies that are used by BioInterchange are listed here too.</p>
80
+ <p>Incorporate the ontologies into your own projects and get easy programmatic access to them using the BioInterchange vocabulary wrappers!</p>
77
81
  <p><a class="btn" href="ontologies.html">View details &raquo;</a></p>
78
82
  </div>
79
83
  </div>
data/web/ontologies.html CHANGED
@@ -50,6 +50,7 @@
50
50
  <div class="span12">
51
51
  <h2>Ontologies</h2>
52
52
  <p>BioInterchange makes use of external ontologies, i.e. ontologies that were developed outside the scope of the BioInterchange project, as well as ontologies that we designed and implemented for specific file format conversion purposes. We are always happy to hear about ontologies that we should consider incorporating into BioInterchange and contributions of ontologies that permit including more file formats for RDF conversion are most welcome.</p>
53
+ <p>The <b>BioInterchange vocabulary wrappers</b> that are available for Ruby, Python and Java, are described as part of the <a href="api.html">API</a> documentation.</p>
53
54
  <h3>BioInterchange Ontologies</h3>
54
55
  Some ontologies have been specifically designed and implemented for BioInterchange and are freely available via the <a href="http://creativecommons.org/licenses/by-sa/3.0/">Creative Commons Attribution Share-Alike</a> (<a href="http://creativecommons.org/licenses/by-sa/3.0/">CC BY-SA</a>) license. The listing below gives an overview of the ontologies we created, where more information about them is available on their <a href="http://biointerchange.github.com/Ontologies">GitHub project pages</a>.
55
56
  <h4>Generic Feature Format Version 3 Ontology</h4>
@@ -64,6 +65,8 @@
64
65
  </ul>
65
66
  <h3>External Ontologies</h3>
66
67
  <ul>
68
+ <li><a href="http://sourceforge.net/apps/mediawiki/cdao/index.php?title=Main_Page">Comparative Data Analysis Ontology</a> (CDAO)</li>
69
+ <li><a href="https://github.com/JervenBolleman/FALDO">Feature Annotation Location Description Ontology</a> (FALDO)</li>
67
70
  <li><a href="http://xmlns.com/foaf/spec">Friend of a Friend</a> (FOAF)</li>
68
71
  <li><a href="http://code.google.com/p/semanticscience/wiki/SIO">Semanticscience Integrated Ontology</a> (SIO)</li>
69
72
  </ul>
@@ -13,6 +13,8 @@ require 'rdf/ntriples'
13
13
 
14
14
  # This will be obsolete once BioInterchange has been turned into a gem:
15
15
  load '../../lib/biointerchange/core.rb'
16
+ load '../../lib/biointerchange/registry.rb'
17
+ load '../../lib/biointerchange/cdao.rb'
16
18
  load '../../lib/biointerchange/faldo.rb'
17
19
  load '../../lib/biointerchange/gff3o.rb'
18
20
  load '../../lib/biointerchange/gvf1o.rb'
@@ -38,17 +40,9 @@ load '../../lib/biointerchange/genomics/gvf_feature_set.rb'
38
40
  load '../../lib/biointerchange/genomics/gvf_pragmas.rb'
39
41
  load '../../lib/biointerchange/genomics/gvf_reader.rb'
40
42
  load '../../lib/biointerchange/genomics/gff3_rdf_ntriples.rb'
41
-
42
- input_formats = {}
43
- input_formats['dbcls.catanns.json'] = [ BioInterchange::TextMining::PubannosJsonReader, 'name', 'name_id', 'date', [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ], 'version' ]
44
- input_formats['uk.ac.man.pdfx'] = [ BioInterchange::TextMining::PdfxXmlReader, 'name', 'name_id', 'date', [ Proc.new { |*args| BioInterchange::TextMining::TMReader::determine_process(*args) }, 'name_id' ], 'version' ]
45
- input_formats['biointerchange.gff3'] = [ BioInterchange::Genomics::GFF3Reader, 'name', 'name_uri', 'date' ]
46
- input_formats['biointerchange.gvf'] = [ BioInterchange::Genomics::GVFReader, 'name', 'name_uri', 'date' ]
47
-
48
- output_formats = {}
49
- output_formats['rdf.bh12.sio'] = BioInterchange::TextMining::RDFWriter
50
- output_formats['rdf.biointerchange.gff3'] = BioInterchange::Genomics::RDFWriter
51
- output_formats['rdf.biointerchange.gvf'] = BioInterchange::Genomics::RDFWriter
43
+ load '../../lib/biointerchange/phylogenetics/tree_set.rb'
44
+ load '../../lib/biointerchange/phylogenetics/newick_reader.rb'
45
+ load '../../lib/biointerchange/phylogenetics/cdao_rdf_ntriples.rb'
52
46
 
53
47
  FCGI.each { |fcgi|
54
48
  request = fcgi.in.read
@@ -63,18 +57,16 @@ FCGI.each { |fcgi|
63
57
  data = URI.decode(request['data'])
64
58
 
65
59
  raise ArgumentError, 'An input format must be given in the meta-data using the key "input".' unless parameters['input']
66
- raise ArgumentError, "Unknown input format \"#{parameters['input']}\"." unless input_formats[parameters['input']]
67
60
  raise ArgumentError, 'An output format must be given in the meta-data using the key "output".' unless parameters['output']
68
- raise ArgumentError, "Unknown output format \"#{parameters['output']}\"." unless output_formats[parameters['output']]
69
61
 
70
- reader_class, *args = input_formats[parameters['input']]
62
+ reader_class, *args = BioInterchange::Registry.reader(parameters['input'])
71
63
  reader = reader_class.new(*BioInterchange::get_parameters(parameters, args))
72
64
  istream, ostream = IO.pipe
73
65
  ostream.print(data)
74
66
  ostream.close
75
67
  model = reader.deserialize(istream)
76
68
  istream, ostream = IO.pipe
77
- output_formats[parameters['output']].new(ostream).serialize(model)
69
+ BioInterchange::Registry.writer(parameters['output']).new(ostream).serialize(model)
78
70
  ostream.close
79
71
  fcgi.out.print(istream.read)
80
72
  rescue => e
data/web/webservices.html CHANGED
@@ -64,14 +64,16 @@
64
64
  <option value="dbcls.catanns.json" selected>DBCLS PubAnnotation's Category Annotations (catanns) JSON</option>
65
65
  <option value="biointerchange.gff3">GFF3</option>
66
66
  <option value="biointerchange.gvf">GVF</option>
67
+ <option value="phylotastic.newick">Newick</option>
67
68
  <option value="uk.ac.man.pdfx">PDFx XML</option>
68
69
  </select><br />
69
70
  Select the RDF serialization method (output format):<br />
70
71
  <span class="muted">Not all output format are supported for a given input format. Invalid choices are disabled automatically here.</span><br />
71
72
  <select id="outputformat" style="min-width: 50%;">
72
- <option id="output.rdf.bh12.sio" value="rdf.bh12.sio" selected>RDF using SIO and FOAF ontologies</option>
73
+ <option id="output.rdf.phylotastic.newick" value="rdf.phylotastic.newick" disabled>RDF using CDAO ontology</option>
73
74
  <option id="output.rdf.biointerchange.gff3" value="rdf.biointerchange.gff3" disabled>RDF using GFF3O ontology</option>
74
75
  <option id="output.rdf.biointerchange.gvf" value="rdf.biointerchange.gvf" disabled>RDF using GVF1O ontology</option>
76
+ <option id="output.rdf.bh12.sio" value="rdf.bh12.sio" selected>RDF using SIO and FOAF ontologies</option>
75
77
  </select><br />
76
78
  <a class="btn btn-info" onclick="pasteExample();">Paste Input-Specific Example</a>
77
79
  </p>
@@ -107,6 +109,7 @@
107
109
  <li><code>biointerchange.gff3</code>: <a href="http://www.sequenceontology.org/resources/gff3.html">Generic Feature Format Version 3</a></li>
108
110
  <li><code>biointerchange.gvf</code>: <a href="http://www.sequenceontology.org/resources/gvf.html">Genome Variation Format</a></li>
109
111
  <li><code>dbcls.catanns.json</code>: <a href="http://pubannotation.dbcls.jp">PubAnnotation categorical annotations</a></li>
112
+ <li><code>phylotastic.newick</code>: <a href="http://evolution.genetics.washington.edu/phylip/newicktree.html">Newick tree file format</a></li>
110
113
  <li><code>uk.ac.man.pdfx</code>: <a href=http://pdfx.cs.man.ac.uk">PDFx</a></li>
111
114
  </ul>
112
115
  <li><code>OUTPUT_METHOD</code>: determines the RDFization method that should be used, output will always be RDF N-Triples; available output formats are</li>
@@ -114,13 +117,14 @@
114
117
  <li><code>rdf.biointerchange.gff3</code>: RDFization of <code>biointerchange.gff3</code></li>
115
118
  <li><code>rdf.biointerchange.gvf</code>: RDFization of <code>biointerchange.gvf</code></li>
116
119
  <li><code>rdf.bh12.sio</code>: RDFization of <code>dbcls.catanns.json</code> or <code>uk.ac.man.pdfx</code></li>
120
+ <li><code>rdf.phylotastic.newick</code>: RDFization of <code>phylotastic.newick</code></li>
117
121
  </ul>
118
122
  <li><code>URL_ENCODED_DATA</code>: data for RDFization as <a href="http://en.wikipedia.org/wiki/Percent-encoding">URL encoded</a> string</li>
119
123
  </ul>
120
124
  </p>
121
125
  <h4>Example</h4>
122
126
  <p>
123
- A query example is part of BioInterchange's source repository. The file [webservice_example.json](https://raw.github.com/BioInterchange/BioInterchange/master/examples/webservice_example.json) contains the following query:
127
+ A query example is part of BioInterchange's source repository. The file <a href="https://raw.github.com/BioInterchange/BioInterchange/master/examples/webservice_example.json">webservice_example.json</a> contains the following query:
124
128
 
125
129
  <pre>
126
130
  {