biointerchange 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +61 -18
- data/VERSION +1 -1
- data/examples/Saccharomyces_cerevisiae_incl_consequences.gvf.gz +0 -0
- data/examples/webservice_example.json +7 -0
- data/generators/GOxrefify.rb +36 -28
- data/generators/javaify.rb +131 -112
- data/generators/make_supplement_releases.rb +57 -0
- data/generators/pythonify.rb +68 -53
- data/lib/biointerchange/core.rb +4 -2
- data/lib/biointerchange/faldo.rb +160 -0
- data/lib/biointerchange/genomics/gff3_feature_set.rb +1 -1
- data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +1 -1
- data/lib/biointerchange/genomics/gff3_reader.rb +1 -1
- data/lib/biointerchange/model.rb +21 -0
- data/lib/biointerchange/registry.rb +1 -1
- data/lib/biointerchange/sio.rb +2035 -57
- data/lib/biointerchange/textmining/document.rb +1 -1
- data/lib/biointerchange/textmining/pdfx_xml_reader.rb +1 -15
- data/lib/biointerchange/textmining/pubannos_json_reader.rb +1 -3
- data/spec/gff3_rdfwriter_spec.rb +1 -0
- data/spec/gvf_rdfwriter_spec.rb +1 -0
- data/spec/text_mining_pdfx_xml_reader_spec.rb +4 -3
- data/spec/text_mining_pubannos_json_reader_spec.rb +6 -5
- data/spec/text_mining_rdfwriter_spec.rb +2 -1
- data/supplemental/java/biointerchange/pom.xml +1 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +219 -0
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +2 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GOXRef.java +1221 -0
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +2 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +2283 -15
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +2 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +5 -4
- data/supplemental/python/biointerchange/faldo.py +168 -0
- data/supplemental/python/biointerchange/gff3o.py +6 -4
- data/supplemental/python/biointerchange/goxref.py +1040 -0
- data/supplemental/python/biointerchange/gvf1o.py +6 -4
- data/supplemental/python/biointerchange/sio.py +1740 -21
- data/supplemental/python/biointerchange/so.py +6527 -6525
- data/supplemental/python/biointerchange/sofa.py +792 -790
- data/supplemental/python/setup.py +2 -2
- data/web/about.html +9 -29
- data/web/api.html +10 -30
- data/web/biointerchange.js +78 -27
- data/web/cli.html +137 -0
- data/web/index.html +19 -34
- data/web/ontologies.html +9 -29
- data/web/service/rdfizer.fcgi +19 -2
- data/web/webservices.html +70 -35
- metadata +13 -3
data/README.md
CHANGED
@@ -7,6 +7,8 @@ BioInterchange is a tool for generating interchangable RDF data from non-RDF dat
|
|
7
7
|
|
8
8
|
Supported input file formats (see examples directory):
|
9
9
|
|
10
|
+
* [GFF3](http://www.sequenceontology.org/resources/gff3.html)
|
11
|
+
* [GVF](http://www.sequenceontology.org/resources/gvf.html)
|
10
12
|
* [Pubannos JSON](http://pubannotation.dbcls.jp/)
|
11
13
|
* [PDFx XML](http://pdfx.cs.man.ac.uk/)
|
12
14
|
|
@@ -19,6 +21,7 @@ Ontologies used in the RDF output:
|
|
19
21
|
* [Generic Feature Format Version 3 Ontology](http://www.biointerchange.org/ontologies.html) (GFF3O)
|
20
22
|
* [Genome Variation Format Version 1 Ontology](http://www.biointerchange.org/ontologies.html) (GVF1O)
|
21
23
|
* [Semanticscience Integrated Ontology](http://code.google.com/p/semanticscience/wiki/SIO) (SIO)
|
24
|
+
* [Sequence Ontology](http://www.sequenceontology.org/index.html) (SO)
|
22
25
|
* [Sequence Ontology Feature Annotation](http://www.sequenceontology.org/index.html) (SOFA)
|
23
26
|
|
24
27
|
#### Contributing
|
@@ -96,7 +99,8 @@ The following list provides information on the origin of the example-data files
|
|
96
99
|
* `BovineGenomeChrX.gff3.gz`: Gzipped GFF3 file describing a Bos taurus chromosome X. Downloaded from [http://bovinegenome.org/?q=download_chromosome_gff3](http://bovinegenome.org/?q=download_chromosome_gff3)
|
97
100
|
* `chromosome_BF.gff`: GFF3 file of floating contigs from the Baylor Sequencing Centre. Downloaded from [http://dictybase.org/Downloads](http://dictybase.org/Downloads)
|
98
101
|
* `estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf`: GVF file of EBI's [DGVa](http://www.ebi.ac.uk/dgva/database-genomic-variants-archive). Downloaded from [ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd176_Banerjee_et_al_2011/gvf/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf](ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd176_Banerjee_et_al_2011/gvf/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf)
|
99
|
-
* `gb-2007-8-3-R40.xml`: Generated by [
|
102
|
+
* `gb-2007-8-3-R40.xml`: Generated by [PDFx](http://pdfx.cs.man.ac.uk) from open-access source PDF [Sense-antisense pairs in mammals: functional and evolutionary considerations](http://genomebiology.com/content/pdf/gb-2007-8-3-r40.pdf)
|
103
|
+
* `Saccharomyces_cerevisiae_incl_consequences.gvf.gz`: Downloaded from [ftp://ftp.ensembl.org/pub/release-71/variation/gvf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae_incl_consequences.gvf.gz](ftp://ftp.ensembl.org/pub/release-71/variation/gvf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae_incl_consequences.gvf.gz)
|
100
104
|
|
101
105
|
### Application Programming Interface
|
102
106
|
|
@@ -118,7 +122,7 @@ BioInterchange available.
|
|
118
122
|
To install the BioInterchange egg, run:
|
119
123
|
|
120
124
|
sudo easy_install rdflib
|
121
|
-
sudo easy_install http://www.biointerchange.org/eggs/biointerchange-0.
|
125
|
+
sudo easy_install http://www.biointerchange.org/eggs/biointerchange-0.2.2-py2.7.egg
|
122
126
|
|
123
127
|
Usage examples:
|
124
128
|
|
@@ -159,7 +163,7 @@ To use the BioInterchange artifact, set-up add the following to your Maven POM f
|
|
159
163
|
<dependency>
|
160
164
|
<groupId>org.biointerchange</groupId>
|
161
165
|
<artifactId>vocabularies</artifactId>
|
162
|
-
<version>0.
|
166
|
+
<version>0.2.2</version>
|
163
167
|
</dependency>
|
164
168
|
</dependencies>
|
165
169
|
|
@@ -231,11 +235,55 @@ Usage examples of accessing GFF3O's vocabulary:
|
|
231
235
|
|
232
236
|
### RESTful Web-Service
|
233
237
|
|
234
|
-
|
238
|
+
A RESTful web-service is available via the URI: [http://www.biointerchange.org/service/rdfizer.biocgi](http://www.biointerchange.org/service/rdfizer.biocgi)
|
239
|
+
|
240
|
+
RDFization parameters and data are send as a single HTTP POST requests containing a JSON object. The JSON object has to be formatted as follows:
|
241
|
+
|
242
|
+
{
|
243
|
+
"parameters" : {
|
244
|
+
"input" : "INPUT_FORMAT",
|
245
|
+
"output": "OUTPUT_METHOD"
|
246
|
+
},
|
247
|
+
"data" : "URL_ENCODED_DATA"
|
248
|
+
}
|
249
|
+
|
250
|
+
* `INPUT_FORMAT`: determines the input data type; available input formats are
|
251
|
+
* `biointerchange.gff3`: [Generic Feature Format Version 3](http://www.sequenceontology.org/resources/gff3.html)
|
252
|
+
* `biointerchange.gvf`: [Genome Variation Format](http://www.sequenceontology.org/resources/gvf.html)
|
253
|
+
* `dbcls.catanns.json`: [PubAnnotation categorical annotations](http://pubannotation.dbcls.jp) JSON
|
254
|
+
* `uk.ac.man.pdfx`: [PDFx](http://pdfx.cs.man.ac.uk) XML
|
255
|
+
* `OUTPUT_METHOD`: determines the RDFization method that should be used, output will always be RDF N-Triples; available output formats are
|
256
|
+
* `rdf.biointerchange.gff3`: RDFization of `biointerchange.gff3`
|
257
|
+
* `rdf.biointerchange.gvf`: RDFization of `biointerchange.gvf`
|
258
|
+
* `rdf.bh12.sio`: RDFization of `dbcls.catanns.json` or `uk.ac.man.pdfx`
|
259
|
+
* `URL_ENCODED_DATA`: data for RDFization as [URL encoded](http://en.wikipedia.org/wiki/Percent-encoding) string
|
260
|
+
|
261
|
+
#### Example
|
262
|
+
|
263
|
+
A query example is part of BioInterchange's source repository. The file [webservice_example.json](https://raw.github.com/BioInterchange/BioInterchange/master/examples/webservice_example.json) contains the following query:
|
264
|
+
|
265
|
+
{
|
266
|
+
"parameters" : {
|
267
|
+
"input" : "biointerchange.gff3",
|
268
|
+
"output": "rdf.biointerchange.gff3"
|
269
|
+
},
|
270
|
+
"data" : "ChrX.38%09bovine_complete_cds_gmap_perfect%09gene%0915870%0916254%09.%09+%09.%09ID%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09mRNA%0915870%0916254%09.%09+%09.%09ID%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%3BParent%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09CDS%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09exon%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0A"
|
271
|
+
}
|
272
|
+
|
273
|
+
The query can be run using the popular [cURL](http://en.wikipedia.org/wiki/CURL) tool:
|
274
|
+
|
275
|
+
curl -d '@webservice_example.json' http://www.biointerchange.org/service/rdfizer.biocgi
|
235
276
|
|
236
277
|
### Interactive Web-Site
|
237
278
|
|
238
|
-
|
279
|
+
BioInterchange has an [interactive web-interface](http://www.biointerchange.org/webservices.html) for RDFizing small amounts of data. Each input format and RDF serialization method pair comes with an example, which can be used as a guidance or test bed for learning how to use BioInterchange.
|
280
|
+
|
281
|
+
#### Usage Instructions
|
282
|
+
|
283
|
+
1. select a data input format (for example, GFF3)
|
284
|
+
2. select a RDF serialization method/output format (for example, "RDF using GFF3O ontology")
|
285
|
+
3. paste RDF serialization method parameters and data in the text fields (or, click "Paste Input-Specific Example")
|
286
|
+
4. click "Generate RDF" and the RDFized data will appear below
|
239
287
|
|
240
288
|
Build Notes
|
241
289
|
-----------
|
@@ -262,10 +310,15 @@ The last step, `bundle`, will install gem dependencies of BioInterchange automat
|
|
262
310
|
|
263
311
|
### Building Vocabulary Classes
|
264
312
|
|
265
|
-
Building a new version of the Ruby vocabulary classes for
|
313
|
+
Building a new version of the Ruby vocabulary classes for FALDO, GFF3O, GVF1O, SIO, SOFA (requires that the OBO files are saves as RDF/XML using [Protege](http://protege.stanford.edu); Apache [Jena](http://jena.apache.org)'s `rdfcat` tool is required to reformat RDF Turtle as RDF/XML):
|
266
314
|
|
267
315
|
sudo gem install rdf
|
268
316
|
sudo gem install rdf-rdfxml
|
317
|
+
echo -e "require 'rdf'\nmodule BioInterchange\n" > lib/biointerchange/faldo.rb
|
318
|
+
rdfcat -ttl <path-to-turtle-version-of-faldo> > faldo.xml.tmp
|
319
|
+
ruby generators/rdfxml.rb faldo.xml.tmp FALDO >> lib/biointerchange/faldo.rb
|
320
|
+
rm -f faldo.xml.tmp
|
321
|
+
echo -e "\nend" >> lib/biointerchange/faldo.rb
|
269
322
|
echo -e "require 'rdf'\nmodule BioInterchange\n" > lib/biointerchange/gff3o.rb
|
270
323
|
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gff3o> GFF3O >> lib/biointerchange/gff3o.rb
|
271
324
|
echo -e "\nend" >> lib/biointerchange/gff3o.rb
|
@@ -292,12 +345,7 @@ A Geno Ontology external reference (GOxref) vocabulary can be created by directl
|
|
292
345
|
|
293
346
|
The source-code generation can be skipped, if none of the ontologies that are used by BioInterchange have been changed. Otherwise, the existing Python vocabulary class wrappers can be generated as follows:
|
294
347
|
|
295
|
-
ruby generators/
|
296
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gvf1o> GVF1O | ruby generators/pythonify.rb > supplemental/python/biointerchange/gvf1o.py
|
297
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sio> SIO | ruby generators/pythonify.rb > supplemental/python/biointerchange/sio.py
|
298
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-so> SO | ruby generators/pythonify.rb > supplemental/python/biointerchange/so.py
|
299
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sofa> SOFA | ruby generators/pythonify.rb > supplemental/python/biointerchange/sofa.py
|
300
|
-
curl ftp://ftp.geneontology.org/pub/go/doc/GO.xrf_abbs | ruby generators/GOxrefify.rb | ruby generators/pythonify.rb > supplemental/python/biointerchange/goxref.py
|
348
|
+
ruby generators/make_supplement_releases.rb
|
301
349
|
|
302
350
|
Generate the BioInterchange Python vocabulary egg:
|
303
351
|
|
@@ -314,12 +362,7 @@ The vocabulary wrapper makes used of RDFLib, which does not install automaticall
|
|
314
362
|
|
315
363
|
The source-code generation can be skipped, if none of the ontologies that are used by BioInterchange have been changed. Otherwise, the existing Java vocabulary class wrappers can be generated as follows:
|
316
364
|
|
317
|
-
ruby generators/
|
318
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gvf1o> GVF1O | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java
|
319
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sio> SIO | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java
|
320
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-so> SO | ruby generators/javaify.rb "http://purl.obolibrary.org/obo/" > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java
|
321
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sofa> SOFA | ruby generators/javaify.rb "http://purl.obolibrary.org/obo/" > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java
|
322
|
-
curl ftp://ftp.geneontology.org/pub/go/doc/GO.xrf_abbs | ruby generators/GOxrefify.rb | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GOXRef.java
|
365
|
+
ruby generators/make_supplement_releases.rb
|
323
366
|
|
324
367
|
Generate the BioInterchange Java vocabulary artifact:
|
325
368
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
Binary file
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{
|
2
|
+
"parameters" : {
|
3
|
+
"input" : "biointerchange.gff3",
|
4
|
+
"output": "rdf.biointerchange.gff3"
|
5
|
+
},
|
6
|
+
"data" : "ChrX.38%09bovine_complete_cds_gmap_perfect%09gene%0915870%0916254%09.%09+%09.%09ID%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09mRNA%0915870%0916254%09.%09+%09.%09ID%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%3BParent%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09CDS%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09exon%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0A"
|
7
|
+
}
|
data/generators/GOxrefify.rb
CHANGED
@@ -1,41 +1,49 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
3
|
def record(id, description, uri)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
cls = " # Returns the link-out URI for objects of \"#{description}\".\n"
|
5
|
+
cls << " def self.#{id}\n"
|
6
|
+
cls << " RDF::URI.new(\"#{uri}\")\n"
|
7
|
+
cls << " end\n\n"
|
8
|
+
|
9
|
+
cls
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
-
|
12
|
+
def goxrefify(ontology)
|
13
|
+
cls = "class GOXRef\n\n"
|
14
|
+
|
15
|
+
in_record = false
|
13
16
|
|
14
|
-
|
17
|
+
id = nil
|
18
|
+
description = nil
|
19
|
+
uri = nil
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
uri = nil
|
21
|
+
ontology.each { |line|
|
22
|
+
line.chomp!
|
19
23
|
|
20
|
-
|
21
|
-
|
24
|
+
if line.empty? then
|
25
|
+
record(id, description, uri) if uri and not uri.match(/\[.*\]/)
|
26
|
+
uri = nil
|
27
|
+
in_record = false
|
28
|
+
end
|
22
29
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
30
|
+
if line.start_with?('abbreviation:') and not in_record then
|
31
|
+
id = line.sub(/^abbreviation: /, '').gsub(/[-\/]/, '_')
|
32
|
+
in_record = true
|
33
|
+
end
|
34
|
+
|
35
|
+
description = line.sub(/^database: /, '') if line.start_with?('database:') and in_record
|
36
|
+
uri = line.sub(/^url_syntax: /, '').sub(/\[example_id\]$/, '') if line.start_with?('url_syntax:') and in_record
|
37
|
+
}
|
28
38
|
|
29
|
-
|
30
|
-
id = line.sub(/^abbreviation: /, '').gsub(/[-\/]/, '_')
|
31
|
-
in_record = true
|
32
|
-
end
|
33
|
-
|
34
|
-
description = line.sub(/^database: /, '') if line.start_with?('database:') and in_record
|
35
|
-
uri = line.sub(/^url_syntax: /, '').sub(/\[example_id\]$/, '') if line.start_with?('url_syntax:') and in_record
|
36
|
-
}
|
39
|
+
cls << record(id, description, uri) if uri
|
37
40
|
|
38
|
-
|
41
|
+
cls << "end\n"
|
39
42
|
|
40
|
-
|
43
|
+
cls
|
44
|
+
end
|
45
|
+
|
46
|
+
unless @loaded_externally then
|
47
|
+
puts goxrefify(STDIN)
|
48
|
+
end
|
41
49
|
|
data/generators/javaify.rb
CHANGED
@@ -1,131 +1,150 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
comment = nil
|
3
|
+
def javaify(rubycode, namespace = nil)
|
4
|
+
private_scope = false
|
5
|
+
java_class = nil
|
6
|
+
comment = nil
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
end
|
8
|
+
namespace_wrapper_generated = false
|
9
|
+
class_closed = false
|
11
10
|
|
12
|
-
cls =
|
11
|
+
cls = <<EOS
|
12
|
+
package org.biointerchange.vocabulary;
|
13
|
+
import java.util.Arrays;
|
14
|
+
import java.util.Map;
|
15
|
+
import java.util.HashMap;
|
16
|
+
import java.util.HashSet;
|
17
|
+
import java.util.Set;
|
18
|
+
import com.hp.hpl.jena.rdf.model.Property;
|
19
|
+
import com.hp.hpl.jena.rdf.model.Resource;
|
20
|
+
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
21
|
+
import org.apache.commons.collections.CollectionUtils;
|
22
|
+
import org.apache.commons.collections.Predicate;
|
13
23
|
|
14
|
-
|
15
|
-
puts ''
|
16
|
-
puts 'import java.util.Arrays;'
|
17
|
-
puts 'import java.util.Map;'
|
18
|
-
puts 'import java.util.HashMap;'
|
19
|
-
puts 'import java.util.HashSet;'
|
20
|
-
puts 'import java.util.Set;'
|
21
|
-
puts 'import com.hp.hpl.jena.rdf.model.Property;'
|
22
|
-
puts 'import com.hp.hpl.jena.rdf.model.Resource;'
|
23
|
-
puts 'import com.hp.hpl.jena.rdf.model.ResourceFactory;'
|
24
|
-
puts 'import org.apache.commons.collections.CollectionUtils;'
|
25
|
-
puts 'import org.apache.commons.collections.Predicate;'
|
26
|
-
puts ''
|
24
|
+
EOS
|
27
25
|
|
28
|
-
|
29
|
-
|
26
|
+
rubycode.each { |line|
|
27
|
+
line.chomp!
|
30
28
|
|
31
|
-
|
32
|
-
if line.match(/http:\/\/[^'")]+#[^'")]+/) then
|
33
|
-
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/(#).*$/, '\1') unless namespace
|
34
|
-
else
|
35
|
-
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/\/[^\/]+$/, '/') unless namespace
|
36
|
-
end
|
37
|
-
if line.match("#{namespace}\w+") then
|
38
|
-
line.gsub!(namespace, '') unless line.strip.start_with?('#')
|
39
|
-
end
|
40
|
-
end
|
29
|
+
next if line.start_with?('module ') or line.start_with?('require ')
|
41
30
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
elsif line.strip.start_with?('def ') then
|
46
|
-
leading_spaces = line.gsub(/[^ ].*$/, '')
|
47
|
-
transduction = ''
|
48
|
-
if comment then
|
49
|
-
transduction << "#{("/**\n *" + comment).gsub(/^/, "#{leading_spaces}")}\n#{leading_spaces} */\n"
|
50
|
-
comment = nil
|
51
|
-
end
|
52
|
-
transduction << " public static #{line.sub('?', '').sub(/self\./, '').sub(/ *def\ /, '')}"
|
53
|
-
method_name = transduction.sub(/^.*public static /m, '').sub(/(\(.*)?$/, '')
|
54
|
-
transduction.sub!("public static #{method_name}", "public static _#{method_name}_") if method_name.match(/^(true|false|class|public|private|static|return|if|while|do|clone|equals|toString|hashCode|byte|char|short|int|long|float|double|boolean)$/)
|
55
|
-
variables = transduction.scan(/^\s*public static \w+\((.+)\)$/)
|
56
|
-
variables = variables[0][0].split(',').map { |variable| variable.strip } if variables.length > 0
|
57
|
-
if method_name == 'is_object_property' then
|
58
|
-
transduction.sub!(/public static .*$/, "public static boolean isObjectProperty(Resource #{variables[0]})")
|
59
|
-
elsif method_name == 'is_datatype_property' then
|
60
|
-
transduction.sub!(/public static .*$/, "public static boolean isDatatypeProperty(Resource #{variables[0]})")
|
61
|
-
elsif method_name == 'is_class' then
|
62
|
-
transduction.sub!(/public static .*$/, "public static boolean isClass(Resource #{variables[0]})")
|
63
|
-
elsif method_name == 'is_named_individual' then
|
64
|
-
transduction.sub!(/public static .*$/, "public static boolean isNamedIndividual(Resource #{variables[0]})")
|
65
|
-
elsif method_name == 'with_parent' then
|
66
|
-
transduction.sub!(/public static .*$/, "public static Set<Resource> withParent(Set<Resource> #{variables[0]}, final Resource #{variables[1]})")
|
67
|
-
elsif method_name == 'has_parent' then
|
68
|
-
transduction.sub!(/public static .*$/, "public static boolean hasParent(Resource #{variables[0]}, Resource #{variables[1]})")
|
69
|
-
else
|
70
|
-
if transduction.match(/\/\*\*[^E]*Either:.*Or:/m) or transduction.match(/\/\*\*[^A]*Ambiguous label\./) then
|
71
|
-
transduction.sub!(/public static /, 'public static Set<Resource> ')
|
31
|
+
if line.match('http://') then
|
32
|
+
if line.match(/http:\/\/[^'")]+#[^'")]+/) then
|
33
|
+
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/(#).*$/, '\1') unless namespace
|
72
34
|
else
|
73
|
-
|
35
|
+
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/\/[^\/]+$/, '/') unless namespace
|
36
|
+
end
|
37
|
+
if line.match("#{namespace}\w+") then
|
38
|
+
line.gsub!(namespace, '') unless line.strip.start_with?('#')
|
74
39
|
end
|
75
40
|
end
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
41
|
+
|
42
|
+
if line.start_with?('class') then
|
43
|
+
java_class = line.sub(/^.* /, '')
|
44
|
+
transduction = "public class #{java_class} {"
|
45
|
+
elsif line.strip.start_with?('def ') then
|
46
|
+
leading_spaces = line.gsub(/[^ ].*$/, '')
|
47
|
+
transduction = ''
|
48
|
+
if comment then
|
49
|
+
transduction << "#{("/**\n *" + comment).gsub(/^/, "#{leading_spaces}")}\n#{leading_spaces} */\n"
|
50
|
+
comment = nil
|
51
|
+
end
|
52
|
+
transduction << " public static #{line.sub('?', '').sub(/self\./, '').sub(/ *def\ /, '')}"
|
53
|
+
method_name = transduction.sub(/^.*public static /m, '').sub(/(\(.*)?$/, '')
|
54
|
+
transduction.sub!("public static #{method_name}", "public static _#{method_name}_") if method_name.match(/^(true|false|class|public|private|static|return|if|while|do|clone|equals|toString|hashCode|byte|char|short|int|long|float|double|boolean)$/)
|
55
|
+
variables = transduction.scan(/^\s*public static \w+\((.+)\)$/)
|
56
|
+
variables = variables[0][0].split(',').map { |variable| variable.strip } if variables.length > 0
|
57
|
+
if method_name == 'is_object_property' then
|
58
|
+
transduction.sub!(/public static .*$/, "public static boolean isObjectProperty(Resource #{variables[0]})")
|
59
|
+
elsif method_name == 'is_datatype_property' then
|
60
|
+
transduction.sub!(/public static .*$/, "public static boolean isDatatypeProperty(Resource #{variables[0]})")
|
61
|
+
elsif method_name == 'is_class' then
|
62
|
+
transduction.sub!(/public static .*$/, "public static boolean isClass(Resource #{variables[0]})")
|
63
|
+
elsif method_name == 'is_named_individual' then
|
64
|
+
transduction.sub!(/public static .*$/, "public static boolean isNamedIndividual(Resource #{variables[0]})")
|
65
|
+
elsif method_name == 'with_parent' then
|
66
|
+
transduction.sub!(/public static .*$/, "public static Set<Resource> withParent(Set<Resource> #{variables[0]}, final Resource #{variables[1]})")
|
67
|
+
elsif method_name == 'has_parent' then
|
68
|
+
transduction.sub!(/public static .*$/, "public static boolean hasParent(Resource #{variables[0]}, Resource #{variables[1]})")
|
69
|
+
else
|
70
|
+
if transduction.match(/\/\*\*[^E]*Either:.*Or:/m) or transduction.match(/\/\*\*[^A]*Ambiguous label\./) then
|
71
|
+
transduction.sub!(/public static /, 'public static Set<Resource> ')
|
72
|
+
else
|
73
|
+
transduction.sub!(/public static /, 'public static Resource ')
|
74
|
+
end
|
75
|
+
end
|
76
|
+
transduction << '()' unless transduction.end_with?(")")
|
77
|
+
transduction << ' {'
|
78
|
+
elsif line.strip.start_with?('#') then
|
79
|
+
unless comment then
|
80
|
+
comment = line.strip.sub(/^# ?/, ' ')
|
81
|
+
else
|
82
|
+
comment << "\n#{line.strip.gsub(/^# ?/, ' * ').gsub(/\+([^+]+)\+::/, '@param \1')}"
|
83
|
+
end
|
84
|
+
transduction = nil
|
85
|
+
elsif line.strip.start_with?('end') then
|
86
|
+
if class_closed then
|
87
|
+
transduction = nil
|
88
|
+
else
|
89
|
+
if line.start_with?('end') then
|
90
|
+
class_closed = true
|
91
|
+
cls.gsub!(/_namespace_#{java_class}\(/, 'return ResourceFactory.createResource(') unless namespace_wrapper_generated
|
92
|
+
end
|
93
|
+
transduction = line.sub(/end/, '}')
|
94
|
+
end
|
95
|
+
elsif line.strip.start_with?('if ') or line.strip.start_with?('elsif') then
|
96
|
+
transduction = "#{line.sub(/ then$/, '').sub('elsif', 'else if').gsub('@@', '__').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)").gsub(/(\w)\?\(/, '\1(')}".gsub(/\.has_key\(([^)]+)\)/, '.containsKey(\1)').gsub(/\[([^\]]+)\]/, '.get(\1)')
|
97
|
+
if transduction.match(/if ([^=]+|_namespace_[^=]+) ?== ?([^_].*|_namespace_.*)/)
|
98
|
+
transduction.sub!(/if ([^= ]+) ?== ?(\S+)\s+$/, 'if \1.equals(\2)')
|
99
|
+
transduction.sub!(/_namespace_\w+\('(\w+)'\)/, "\"#{namespace}\\1\"")
|
100
|
+
end
|
101
|
+
transduction.sub!(/\.equals\(("[^"]+")\)/, '.equals(ResourceFactory.createResource(\1))')
|
102
|
+
transduction.sub!(/if /, 'if (')
|
103
|
+
transduction.sub!(/$/, ') {')
|
104
|
+
elsif line.strip.start_with?('return [') then
|
105
|
+
transduction = line.sub(/return \[/, 'return new HashSet<Resource>(Arrays.asList(new Resource[] {').sub(/\]$/, '}));').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)")
|
106
|
+
elsif line.strip.start_with?('private') then
|
107
|
+
private_scope = true
|
108
|
+
namespace_wrapper_generated = true
|
109
|
+
transduction = " private static Resource _namespace_#{java_class}(String accession) {\n"
|
110
|
+
transduction << " if (isClass(ResourceFactory.createResource(\"#{namespace}\" + accession))) {\n"
|
111
|
+
transduction << " return ResourceFactory.createResource(\"#{namespace}\" + accession);\n"
|
112
|
+
transduction << " } else {\n"
|
113
|
+
transduction << " return ResourceFactory.createProperty(\"#{namespace}\" + accession);\n"
|
114
|
+
transduction << " }\n"
|
115
|
+
transduction << " }\n\n"
|
81
116
|
else
|
82
|
-
|
117
|
+
transduction = line.gsub('@@', '__').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)").gsub(/(\w+)\.select ?\{ ?\|(\w+)\| ?(.*[^ ]) ?\}/, 'new HashSet<Resource>(CollectionUtils.select(\1, new Predicate() { public boolean evaluate(Object \2) { return \3; } }))').gsub(/(\w)\?\(/, '\1(').gsub('has_parent(', 'hasParent((Resource)').gsub(/\[([^\]]+)\]/, '.get(\1)')
|
118
|
+
transduction.gsub!('__', '__') unless line.strip.start_with?('@@')
|
119
|
+
transduction << ';' unless line.strip.empty?
|
83
120
|
end
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
transduction
|
91
|
-
transduction
|
121
|
+
|
122
|
+
if transduction and transduction.strip.start_with?('__') then
|
123
|
+
variable = transduction.scan(/__\w+/)[0]
|
124
|
+
map = {}
|
125
|
+
map = Hash[*transduction.sub(/^.* \{/, '').sub(/\};/, '').split(',').map { |assignment| assignment.split('=>').map { |kv| kv.strip } }.flatten.map { |function| if function.match(/'\w+'/) then "#{namespace}#{function.scan(/'\w+'/)[0].gsub(/'/, '')}" else nil end }.compact ] if transduction.match(/\{\s*\S+.*\}/)
|
126
|
+
transduction = " private static Map<Resource, Resource> #{variable} = _init_#{variable}();\n\n"
|
127
|
+
transduction << " private static Map<Resource, Resource> _init_#{variable}() {\n"
|
128
|
+
transduction << " Map<Resource, Resource> map = new HashMap<Resource, Resource>();\n\n"
|
129
|
+
map.each_pair { |key, value| transduction << " map.put(ResourceFactory.createResource(\"#{key}\"), ResourceFactory.createResource(\"#{value}\"));\n" }
|
130
|
+
transduction << "\n"
|
131
|
+
transduction << " return map;\n"
|
132
|
+
transduction << " }\n"
|
92
133
|
end
|
93
|
-
transduction.sub!(/\.equals\(("[^"]+")\)/, '.equals(ResourceFactory.createResource(\1))')
|
94
|
-
transduction.sub!(/if /, 'if (')
|
95
|
-
transduction.sub!(/$/, ') {')
|
96
|
-
elsif line.strip.start_with?('return [') then
|
97
|
-
transduction = line.sub(/return \[/, 'return new HashSet<Resource>(Arrays.asList(new Resource[] {').sub(/\]$/, '}));').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)")
|
98
|
-
elsif line.strip.start_with?('private') then
|
99
|
-
private_scope = true
|
100
|
-
transduction = " private static Resource _namespace_#{java_class}(String accession) {\n"
|
101
|
-
transduction << " if (isClass(ResourceFactory.createResource(\"#{namespace}\" + accession))) {\n"
|
102
|
-
transduction << " return ResourceFactory.createResource(\"#{namespace}\" + accession);\n"
|
103
|
-
transduction << " } else {\n"
|
104
|
-
transduction << " return ResourceFactory.createProperty(\"#{namespace}\" + accession);\n"
|
105
|
-
transduction << " }\n"
|
106
|
-
transduction << " }\n\n"
|
107
|
-
else
|
108
|
-
transduction = line.gsub('@@', '__').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)").gsub(/(\w+)\.select ?\{ ?\|(\w+)\| ?(.*[^ ]) ?\}/, 'new HashSet<Resource>(CollectionUtils.select(\1, new Predicate() { public boolean evaluate(Object \2) { return \3; } }))').gsub(/(\w)\?\(/, '\1(').gsub('has_parent(', 'hasParent((Resource)').gsub(/\[([^\]]+)\]/, '.get(\1)')
|
109
|
-
transduction.gsub!('__', '__') unless line.strip.start_with?('@@')
|
110
|
-
transduction << ';' unless line.strip.empty?
|
111
|
-
end
|
112
134
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
transduction << " private static Map<Resource, Resource> _init_#{variable}() {\n"
|
119
|
-
transduction << " Map<Resource, Resource> map = new HashMap<Resource, Resource>();\n\n"
|
120
|
-
map.each_pair { |key, value| transduction << " map.put(ResourceFactory.createResource(\"#{key}\"), ResourceFactory.createResource(\"#{value}\"));\n" }
|
121
|
-
transduction << "\n"
|
122
|
-
transduction << " return map;\n"
|
123
|
-
transduction << " }\n"
|
124
|
-
end
|
135
|
+
cls << "#{transduction}\n".gsub(/'/, '"') if transduction
|
136
|
+
}
|
137
|
+
|
138
|
+
cls
|
139
|
+
end
|
125
140
|
|
126
|
-
|
141
|
+
unless @loaded_externally then
|
142
|
+
namespace = nil
|
127
143
|
|
128
|
-
|
144
|
+
if ARGV.length == 1 then
|
145
|
+
namespace = ARGV[0]
|
146
|
+
end
|
129
147
|
|
130
|
-
puts
|
148
|
+
puts javaify(STDIN, namespace)
|
149
|
+
end
|
131
150
|
|