biointerchange 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +61 -18
- data/VERSION +1 -1
- data/examples/Saccharomyces_cerevisiae_incl_consequences.gvf.gz +0 -0
- data/examples/webservice_example.json +7 -0
- data/generators/GOxrefify.rb +36 -28
- data/generators/javaify.rb +131 -112
- data/generators/make_supplement_releases.rb +57 -0
- data/generators/pythonify.rb +68 -53
- data/lib/biointerchange/core.rb +4 -2
- data/lib/biointerchange/faldo.rb +160 -0
- data/lib/biointerchange/genomics/gff3_feature_set.rb +1 -1
- data/lib/biointerchange/genomics/gff3_rdf_ntriples.rb +1 -1
- data/lib/biointerchange/genomics/gff3_reader.rb +1 -1
- data/lib/biointerchange/model.rb +21 -0
- data/lib/biointerchange/registry.rb +1 -1
- data/lib/biointerchange/sio.rb +2035 -57
- data/lib/biointerchange/textmining/document.rb +1 -1
- data/lib/biointerchange/textmining/pdfx_xml_reader.rb +1 -15
- data/lib/biointerchange/textmining/pubannos_json_reader.rb +1 -3
- data/spec/gff3_rdfwriter_spec.rb +1 -0
- data/spec/gvf_rdfwriter_spec.rb +1 -0
- data/spec/text_mining_pdfx_xml_reader_spec.rb +4 -3
- data/spec/text_mining_pubannos_json_reader_spec.rb +6 -5
- data/spec/text_mining_rdfwriter_spec.rb +2 -1
- data/supplemental/java/biointerchange/pom.xml +1 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/FALDO.java +219 -0
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GFF3O.java +2 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GOXRef.java +1221 -0
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java +2 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java +2283 -15
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java +2 -1
- data/supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java +5 -4
- data/supplemental/python/biointerchange/faldo.py +168 -0
- data/supplemental/python/biointerchange/gff3o.py +6 -4
- data/supplemental/python/biointerchange/goxref.py +1040 -0
- data/supplemental/python/biointerchange/gvf1o.py +6 -4
- data/supplemental/python/biointerchange/sio.py +1740 -21
- data/supplemental/python/biointerchange/so.py +6527 -6525
- data/supplemental/python/biointerchange/sofa.py +792 -790
- data/supplemental/python/setup.py +2 -2
- data/web/about.html +9 -29
- data/web/api.html +10 -30
- data/web/biointerchange.js +78 -27
- data/web/cli.html +137 -0
- data/web/index.html +19 -34
- data/web/ontologies.html +9 -29
- data/web/service/rdfizer.fcgi +19 -2
- data/web/webservices.html +70 -35
- metadata +13 -3
data/README.md
CHANGED
@@ -7,6 +7,8 @@ BioInterchange is a tool for generating interchangable RDF data from non-RDF dat
|
|
7
7
|
|
8
8
|
Supported input file formats (see examples directory):
|
9
9
|
|
10
|
+
* [GFF3](http://www.sequenceontology.org/resources/gff3.html)
|
11
|
+
* [GVF](http://www.sequenceontology.org/resources/gvf.html)
|
10
12
|
* [Pubannos JSON](http://pubannotation.dbcls.jp/)
|
11
13
|
* [PDFx XML](http://pdfx.cs.man.ac.uk/)
|
12
14
|
|
@@ -19,6 +21,7 @@ Ontologies used in the RDF output:
|
|
19
21
|
* [Generic Feature Format Version 3 Ontology](http://www.biointerchange.org/ontologies.html) (GFF3O)
|
20
22
|
* [Genome Variation Format Version 1 Ontology](http://www.biointerchange.org/ontologies.html) (GVF1O)
|
21
23
|
* [Semanticscience Integrated Ontology](http://code.google.com/p/semanticscience/wiki/SIO) (SIO)
|
24
|
+
* [Sequence Ontology](http://www.sequenceontology.org/index.html) (SO)
|
22
25
|
* [Sequence Ontology Feature Annotation](http://www.sequenceontology.org/index.html) (SOFA)
|
23
26
|
|
24
27
|
#### Contributing
|
@@ -96,7 +99,8 @@ The following list provides information on the origin of the example-data files
|
|
96
99
|
* `BovineGenomeChrX.gff3.gz`: Gzipped GFF3 file describing a Bos taurus chromosome X. Downloaded from [http://bovinegenome.org/?q=download_chromosome_gff3](http://bovinegenome.org/?q=download_chromosome_gff3)
|
97
100
|
* `chromosome_BF.gff`: GFF3 file of floating contigs from the Baylor Sequencing Centre. Downloaded from [http://dictybase.org/Downloads](http://dictybase.org/Downloads)
|
98
101
|
* `estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf`: GVF file of EBI's [DGVa](http://www.ebi.ac.uk/dgva/database-genomic-variants-archive). Downloaded from [ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd176_Banerjee_et_al_2011/gvf/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf](ftp://ftp.ebi.ac.uk/pub/databases/dgva/estd176_Banerjee_et_al_2011/gvf/estd176_Banerjee_et_al_2011.2012-11-29.NCBI36.gvf)
|
99
|
-
* `gb-2007-8-3-R40.xml`: Generated by [
|
102
|
+
* `gb-2007-8-3-R40.xml`: Generated by [PDFx](http://pdfx.cs.man.ac.uk) from open-access source PDF [Sense-antisense pairs in mammals: functional and evolutionary considerations](http://genomebiology.com/content/pdf/gb-2007-8-3-r40.pdf)
|
103
|
+
* `Saccharomyces_cerevisiae_incl_consequences.gvf.gz`: Downloaded from [ftp://ftp.ensembl.org/pub/release-71/variation/gvf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae_incl_consequences.gvf.gz](ftp://ftp.ensembl.org/pub/release-71/variation/gvf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae_incl_consequences.gvf.gz)
|
100
104
|
|
101
105
|
### Application Programming Interface
|
102
106
|
|
@@ -118,7 +122,7 @@ BioInterchange available.
|
|
118
122
|
To install the BioInterchange egg, run:
|
119
123
|
|
120
124
|
sudo easy_install rdflib
|
121
|
-
sudo easy_install http://www.biointerchange.org/eggs/biointerchange-0.
|
125
|
+
sudo easy_install http://www.biointerchange.org/eggs/biointerchange-0.2.2-py2.7.egg
|
122
126
|
|
123
127
|
Usage examples:
|
124
128
|
|
@@ -159,7 +163,7 @@ To use the BioInterchange artifact, set-up add the following to your Maven POM f
|
|
159
163
|
<dependency>
|
160
164
|
<groupId>org.biointerchange</groupId>
|
161
165
|
<artifactId>vocabularies</artifactId>
|
162
|
-
<version>0.
|
166
|
+
<version>0.2.2</version>
|
163
167
|
</dependency>
|
164
168
|
</dependencies>
|
165
169
|
|
@@ -231,11 +235,55 @@ Usage examples of accessing GFF3O's vocabulary:
|
|
231
235
|
|
232
236
|
### RESTful Web-Service
|
233
237
|
|
234
|
-
|
238
|
+
A RESTful web-service is available via the URI: [http://www.biointerchange.org/service/rdfizer.biocgi](http://www.biointerchange.org/service/rdfizer.biocgi)
|
239
|
+
|
240
|
+
RDFization parameters and data are send as a single HTTP POST requests containing a JSON object. The JSON object has to be formatted as follows:
|
241
|
+
|
242
|
+
{
|
243
|
+
"parameters" : {
|
244
|
+
"input" : "INPUT_FORMAT",
|
245
|
+
"output": "OUTPUT_METHOD"
|
246
|
+
},
|
247
|
+
"data" : "URL_ENCODED_DATA"
|
248
|
+
}
|
249
|
+
|
250
|
+
* `INPUT_FORMAT`: determines the input data type; available input formats are
|
251
|
+
* `biointerchange.gff3`: [Generic Feature Format Version 3](http://www.sequenceontology.org/resources/gff3.html)
|
252
|
+
* `biointerchange.gvf`: [Genome Variation Format](http://www.sequenceontology.org/resources/gvf.html)
|
253
|
+
* `dbcls.catanns.json`: [PubAnnotation categorical annotations](http://pubannotation.dbcls.jp) JSON
|
254
|
+
* `uk.ac.man.pdfx`: [PDFx](http://pdfx.cs.man.ac.uk) XML
|
255
|
+
* `OUTPUT_METHOD`: determines the RDFization method that should be used, output will always be RDF N-Triples; available output formats are
|
256
|
+
* `rdf.biointerchange.gff3`: RDFization of `biointerchange.gff3`
|
257
|
+
* `rdf.biointerchange.gvf`: RDFization of `biointerchange.gvf`
|
258
|
+
* `rdf.bh12.sio`: RDFization of `dbcls.catanns.json` or `uk.ac.man.pdfx`
|
259
|
+
* `URL_ENCODED_DATA`: data for RDFization as [URL encoded](http://en.wikipedia.org/wiki/Percent-encoding) string
|
260
|
+
|
261
|
+
#### Example
|
262
|
+
|
263
|
+
A query example is part of BioInterchange's source repository. The file [webservice_example.json](https://raw.github.com/BioInterchange/BioInterchange/master/examples/webservice_example.json) contains the following query:
|
264
|
+
|
265
|
+
{
|
266
|
+
"parameters" : {
|
267
|
+
"input" : "biointerchange.gff3",
|
268
|
+
"output": "rdf.biointerchange.gff3"
|
269
|
+
},
|
270
|
+
"data" : "ChrX.38%09bovine_complete_cds_gmap_perfect%09gene%0915870%0916254%09.%09+%09.%09ID%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09mRNA%0915870%0916254%09.%09+%09.%09ID%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%3BParent%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09CDS%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09exon%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0A"
|
271
|
+
}
|
272
|
+
|
273
|
+
The query can be run using the popular [cURL](http://en.wikipedia.org/wiki/CURL) tool:
|
274
|
+
|
275
|
+
curl -d '@webservice_example.json' http://www.biointerchange.org/service/rdfizer.biocgi
|
235
276
|
|
236
277
|
### Interactive Web-Site
|
237
278
|
|
238
|
-
|
279
|
+
BioInterchange has an [interactive web-interface](http://www.biointerchange.org/webservices.html) for RDFizing small amounts of data. Each input format and RDF serialization method pair comes with an example, which can be used as a guidance or test bed for learning how to use BioInterchange.
|
280
|
+
|
281
|
+
#### Usage Instructions
|
282
|
+
|
283
|
+
1. select a data input format (for example, GFF3)
|
284
|
+
2. select a RDF serialization method/output format (for example, "RDF using GFF3O ontology")
|
285
|
+
3. paste RDF serialization method parameters and data in the text fields (or, click "Paste Input-Specific Example")
|
286
|
+
4. click "Generate RDF" and the RDFized data will appear below
|
239
287
|
|
240
288
|
Build Notes
|
241
289
|
-----------
|
@@ -262,10 +310,15 @@ The last step, `bundle`, will install gem dependencies of BioInterchange automat
|
|
262
310
|
|
263
311
|
### Building Vocabulary Classes
|
264
312
|
|
265
|
-
Building a new version of the Ruby vocabulary classes for
|
313
|
+
Building a new version of the Ruby vocabulary classes for FALDO, GFF3O, GVF1O, SIO, SOFA (requires that the OBO files are saves as RDF/XML using [Protege](http://protege.stanford.edu); Apache [Jena](http://jena.apache.org)'s `rdfcat` tool is required to reformat RDF Turtle as RDF/XML):
|
266
314
|
|
267
315
|
sudo gem install rdf
|
268
316
|
sudo gem install rdf-rdfxml
|
317
|
+
echo -e "require 'rdf'\nmodule BioInterchange\n" > lib/biointerchange/faldo.rb
|
318
|
+
rdfcat -ttl <path-to-turtle-version-of-faldo> > faldo.xml.tmp
|
319
|
+
ruby generators/rdfxml.rb faldo.xml.tmp FALDO >> lib/biointerchange/faldo.rb
|
320
|
+
rm -f faldo.xml.tmp
|
321
|
+
echo -e "\nend" >> lib/biointerchange/faldo.rb
|
269
322
|
echo -e "require 'rdf'\nmodule BioInterchange\n" > lib/biointerchange/gff3o.rb
|
270
323
|
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gff3o> GFF3O >> lib/biointerchange/gff3o.rb
|
271
324
|
echo -e "\nend" >> lib/biointerchange/gff3o.rb
|
@@ -292,12 +345,7 @@ A Geno Ontology external reference (GOxref) vocabulary can be created by directl
|
|
292
345
|
|
293
346
|
The source-code generation can be skipped, if none of the ontologies that are used by BioInterchange have been changed. Otherwise, the existing Python vocabulary class wrappers can be generated as follows:
|
294
347
|
|
295
|
-
ruby generators/
|
296
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gvf1o> GVF1O | ruby generators/pythonify.rb > supplemental/python/biointerchange/gvf1o.py
|
297
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sio> SIO | ruby generators/pythonify.rb > supplemental/python/biointerchange/sio.py
|
298
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-so> SO | ruby generators/pythonify.rb > supplemental/python/biointerchange/so.py
|
299
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sofa> SOFA | ruby generators/pythonify.rb > supplemental/python/biointerchange/sofa.py
|
300
|
-
curl ftp://ftp.geneontology.org/pub/go/doc/GO.xrf_abbs | ruby generators/GOxrefify.rb | ruby generators/pythonify.rb > supplemental/python/biointerchange/goxref.py
|
348
|
+
ruby generators/make_supplement_releases.rb
|
301
349
|
|
302
350
|
Generate the BioInterchange Python vocabulary egg:
|
303
351
|
|
@@ -314,12 +362,7 @@ The vocabulary wrapper makes used of RDFLib, which does not install automaticall
|
|
314
362
|
|
315
363
|
The source-code generation can be skipped, if none of the ontologies that are used by BioInterchange have been changed. Otherwise, the existing Java vocabulary class wrappers can be generated as follows:
|
316
364
|
|
317
|
-
ruby generators/
|
318
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-gvf1o> GVF1O | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GVF1O.java
|
319
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sio> SIO | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SIO.java
|
320
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-so> SO | ruby generators/javaify.rb "http://purl.obolibrary.org/obo/" > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SO.java
|
321
|
-
ruby generators/rdfxml.rb <path-to-rdf/xml-version-of-sofa> SOFA | ruby generators/javaify.rb "http://purl.obolibrary.org/obo/" > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/SOFA.java
|
322
|
-
curl ftp://ftp.geneontology.org/pub/go/doc/GO.xrf_abbs | ruby generators/GOxrefify.rb | ruby generators/javaify.rb > supplemental/java/biointerchange/src/main/java/org/biointerchange/vocabulary/GOXRef.java
|
365
|
+
ruby generators/make_supplement_releases.rb
|
323
366
|
|
324
367
|
Generate the BioInterchange Java vocabulary artifact:
|
325
368
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
Binary file
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{
|
2
|
+
"parameters" : {
|
3
|
+
"input" : "biointerchange.gff3",
|
4
|
+
"output": "rdf.biointerchange.gff3"
|
5
|
+
},
|
6
|
+
"data" : "ChrX.38%09bovine_complete_cds_gmap_perfect%09gene%0915870%0916254%09.%09+%09.%09ID%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09mRNA%0915870%0916254%09.%09+%09.%09ID%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%3BParent%3DBC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09CDS%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0AChrX.38%09bovine_complete_cds_gmap_perfect%09exon%0915870%0916254%09.%09+%090%09Parent%3Dbovine_complete_cds_gmap_perfect_BC109609_ChrX.38%0A"
|
7
|
+
}
|
data/generators/GOxrefify.rb
CHANGED
@@ -1,41 +1,49 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
3
|
def record(id, description, uri)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
cls = " # Returns the link-out URI for objects of \"#{description}\".\n"
|
5
|
+
cls << " def self.#{id}\n"
|
6
|
+
cls << " RDF::URI.new(\"#{uri}\")\n"
|
7
|
+
cls << " end\n\n"
|
8
|
+
|
9
|
+
cls
|
9
10
|
end
|
10
11
|
|
11
|
-
|
12
|
-
|
12
|
+
def goxrefify(ontology)
|
13
|
+
cls = "class GOXRef\n\n"
|
14
|
+
|
15
|
+
in_record = false
|
13
16
|
|
14
|
-
|
17
|
+
id = nil
|
18
|
+
description = nil
|
19
|
+
uri = nil
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
uri = nil
|
21
|
+
ontology.each { |line|
|
22
|
+
line.chomp!
|
19
23
|
|
20
|
-
|
21
|
-
|
24
|
+
if line.empty? then
|
25
|
+
record(id, description, uri) if uri and not uri.match(/\[.*\]/)
|
26
|
+
uri = nil
|
27
|
+
in_record = false
|
28
|
+
end
|
22
29
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
30
|
+
if line.start_with?('abbreviation:') and not in_record then
|
31
|
+
id = line.sub(/^abbreviation: /, '').gsub(/[-\/]/, '_')
|
32
|
+
in_record = true
|
33
|
+
end
|
34
|
+
|
35
|
+
description = line.sub(/^database: /, '') if line.start_with?('database:') and in_record
|
36
|
+
uri = line.sub(/^url_syntax: /, '').sub(/\[example_id\]$/, '') if line.start_with?('url_syntax:') and in_record
|
37
|
+
}
|
28
38
|
|
29
|
-
|
30
|
-
id = line.sub(/^abbreviation: /, '').gsub(/[-\/]/, '_')
|
31
|
-
in_record = true
|
32
|
-
end
|
33
|
-
|
34
|
-
description = line.sub(/^database: /, '') if line.start_with?('database:') and in_record
|
35
|
-
uri = line.sub(/^url_syntax: /, '').sub(/\[example_id\]$/, '') if line.start_with?('url_syntax:') and in_record
|
36
|
-
}
|
39
|
+
cls << record(id, description, uri) if uri
|
37
40
|
|
38
|
-
|
41
|
+
cls << "end\n"
|
39
42
|
|
40
|
-
|
43
|
+
cls
|
44
|
+
end
|
45
|
+
|
46
|
+
unless @loaded_externally then
|
47
|
+
puts goxrefify(STDIN)
|
48
|
+
end
|
41
49
|
|
data/generators/javaify.rb
CHANGED
@@ -1,131 +1,150 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
comment = nil
|
3
|
+
def javaify(rubycode, namespace = nil)
|
4
|
+
private_scope = false
|
5
|
+
java_class = nil
|
6
|
+
comment = nil
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
end
|
8
|
+
namespace_wrapper_generated = false
|
9
|
+
class_closed = false
|
11
10
|
|
12
|
-
cls =
|
11
|
+
cls = <<EOS
|
12
|
+
package org.biointerchange.vocabulary;
|
13
|
+
import java.util.Arrays;
|
14
|
+
import java.util.Map;
|
15
|
+
import java.util.HashMap;
|
16
|
+
import java.util.HashSet;
|
17
|
+
import java.util.Set;
|
18
|
+
import com.hp.hpl.jena.rdf.model.Property;
|
19
|
+
import com.hp.hpl.jena.rdf.model.Resource;
|
20
|
+
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
21
|
+
import org.apache.commons.collections.CollectionUtils;
|
22
|
+
import org.apache.commons.collections.Predicate;
|
13
23
|
|
14
|
-
|
15
|
-
puts ''
|
16
|
-
puts 'import java.util.Arrays;'
|
17
|
-
puts 'import java.util.Map;'
|
18
|
-
puts 'import java.util.HashMap;'
|
19
|
-
puts 'import java.util.HashSet;'
|
20
|
-
puts 'import java.util.Set;'
|
21
|
-
puts 'import com.hp.hpl.jena.rdf.model.Property;'
|
22
|
-
puts 'import com.hp.hpl.jena.rdf.model.Resource;'
|
23
|
-
puts 'import com.hp.hpl.jena.rdf.model.ResourceFactory;'
|
24
|
-
puts 'import org.apache.commons.collections.CollectionUtils;'
|
25
|
-
puts 'import org.apache.commons.collections.Predicate;'
|
26
|
-
puts ''
|
24
|
+
EOS
|
27
25
|
|
28
|
-
|
29
|
-
|
26
|
+
rubycode.each { |line|
|
27
|
+
line.chomp!
|
30
28
|
|
31
|
-
|
32
|
-
if line.match(/http:\/\/[^'")]+#[^'")]+/) then
|
33
|
-
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/(#).*$/, '\1') unless namespace
|
34
|
-
else
|
35
|
-
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/\/[^\/]+$/, '/') unless namespace
|
36
|
-
end
|
37
|
-
if line.match("#{namespace}\w+") then
|
38
|
-
line.gsub!(namespace, '') unless line.strip.start_with?('#')
|
39
|
-
end
|
40
|
-
end
|
29
|
+
next if line.start_with?('module ') or line.start_with?('require ')
|
41
30
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
elsif line.strip.start_with?('def ') then
|
46
|
-
leading_spaces = line.gsub(/[^ ].*$/, '')
|
47
|
-
transduction = ''
|
48
|
-
if comment then
|
49
|
-
transduction << "#{("/**\n *" + comment).gsub(/^/, "#{leading_spaces}")}\n#{leading_spaces} */\n"
|
50
|
-
comment = nil
|
51
|
-
end
|
52
|
-
transduction << " public static #{line.sub('?', '').sub(/self\./, '').sub(/ *def\ /, '')}"
|
53
|
-
method_name = transduction.sub(/^.*public static /m, '').sub(/(\(.*)?$/, '')
|
54
|
-
transduction.sub!("public static #{method_name}", "public static _#{method_name}_") if method_name.match(/^(true|false|class|public|private|static|return|if|while|do|clone|equals|toString|hashCode|byte|char|short|int|long|float|double|boolean)$/)
|
55
|
-
variables = transduction.scan(/^\s*public static \w+\((.+)\)$/)
|
56
|
-
variables = variables[0][0].split(',').map { |variable| variable.strip } if variables.length > 0
|
57
|
-
if method_name == 'is_object_property' then
|
58
|
-
transduction.sub!(/public static .*$/, "public static boolean isObjectProperty(Resource #{variables[0]})")
|
59
|
-
elsif method_name == 'is_datatype_property' then
|
60
|
-
transduction.sub!(/public static .*$/, "public static boolean isDatatypeProperty(Resource #{variables[0]})")
|
61
|
-
elsif method_name == 'is_class' then
|
62
|
-
transduction.sub!(/public static .*$/, "public static boolean isClass(Resource #{variables[0]})")
|
63
|
-
elsif method_name == 'is_named_individual' then
|
64
|
-
transduction.sub!(/public static .*$/, "public static boolean isNamedIndividual(Resource #{variables[0]})")
|
65
|
-
elsif method_name == 'with_parent' then
|
66
|
-
transduction.sub!(/public static .*$/, "public static Set<Resource> withParent(Set<Resource> #{variables[0]}, final Resource #{variables[1]})")
|
67
|
-
elsif method_name == 'has_parent' then
|
68
|
-
transduction.sub!(/public static .*$/, "public static boolean hasParent(Resource #{variables[0]}, Resource #{variables[1]})")
|
69
|
-
else
|
70
|
-
if transduction.match(/\/\*\*[^E]*Either:.*Or:/m) or transduction.match(/\/\*\*[^A]*Ambiguous label\./) then
|
71
|
-
transduction.sub!(/public static /, 'public static Set<Resource> ')
|
31
|
+
if line.match('http://') then
|
32
|
+
if line.match(/http:\/\/[^'")]+#[^'")]+/) then
|
33
|
+
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/(#).*$/, '\1') unless namespace
|
72
34
|
else
|
73
|
-
|
35
|
+
namespace = line.scan(/http:\/\/[^'")]+/)[0].sub(/\/[^\/]+$/, '/') unless namespace
|
36
|
+
end
|
37
|
+
if line.match("#{namespace}\w+") then
|
38
|
+
line.gsub!(namespace, '') unless line.strip.start_with?('#')
|
74
39
|
end
|
75
40
|
end
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
41
|
+
|
42
|
+
if line.start_with?('class') then
|
43
|
+
java_class = line.sub(/^.* /, '')
|
44
|
+
transduction = "public class #{java_class} {"
|
45
|
+
elsif line.strip.start_with?('def ') then
|
46
|
+
leading_spaces = line.gsub(/[^ ].*$/, '')
|
47
|
+
transduction = ''
|
48
|
+
if comment then
|
49
|
+
transduction << "#{("/**\n *" + comment).gsub(/^/, "#{leading_spaces}")}\n#{leading_spaces} */\n"
|
50
|
+
comment = nil
|
51
|
+
end
|
52
|
+
transduction << " public static #{line.sub('?', '').sub(/self\./, '').sub(/ *def\ /, '')}"
|
53
|
+
method_name = transduction.sub(/^.*public static /m, '').sub(/(\(.*)?$/, '')
|
54
|
+
transduction.sub!("public static #{method_name}", "public static _#{method_name}_") if method_name.match(/^(true|false|class|public|private|static|return|if|while|do|clone|equals|toString|hashCode|byte|char|short|int|long|float|double|boolean)$/)
|
55
|
+
variables = transduction.scan(/^\s*public static \w+\((.+)\)$/)
|
56
|
+
variables = variables[0][0].split(',').map { |variable| variable.strip } if variables.length > 0
|
57
|
+
if method_name == 'is_object_property' then
|
58
|
+
transduction.sub!(/public static .*$/, "public static boolean isObjectProperty(Resource #{variables[0]})")
|
59
|
+
elsif method_name == 'is_datatype_property' then
|
60
|
+
transduction.sub!(/public static .*$/, "public static boolean isDatatypeProperty(Resource #{variables[0]})")
|
61
|
+
elsif method_name == 'is_class' then
|
62
|
+
transduction.sub!(/public static .*$/, "public static boolean isClass(Resource #{variables[0]})")
|
63
|
+
elsif method_name == 'is_named_individual' then
|
64
|
+
transduction.sub!(/public static .*$/, "public static boolean isNamedIndividual(Resource #{variables[0]})")
|
65
|
+
elsif method_name == 'with_parent' then
|
66
|
+
transduction.sub!(/public static .*$/, "public static Set<Resource> withParent(Set<Resource> #{variables[0]}, final Resource #{variables[1]})")
|
67
|
+
elsif method_name == 'has_parent' then
|
68
|
+
transduction.sub!(/public static .*$/, "public static boolean hasParent(Resource #{variables[0]}, Resource #{variables[1]})")
|
69
|
+
else
|
70
|
+
if transduction.match(/\/\*\*[^E]*Either:.*Or:/m) or transduction.match(/\/\*\*[^A]*Ambiguous label\./) then
|
71
|
+
transduction.sub!(/public static /, 'public static Set<Resource> ')
|
72
|
+
else
|
73
|
+
transduction.sub!(/public static /, 'public static Resource ')
|
74
|
+
end
|
75
|
+
end
|
76
|
+
transduction << '()' unless transduction.end_with?(")")
|
77
|
+
transduction << ' {'
|
78
|
+
elsif line.strip.start_with?('#') then
|
79
|
+
unless comment then
|
80
|
+
comment = line.strip.sub(/^# ?/, ' ')
|
81
|
+
else
|
82
|
+
comment << "\n#{line.strip.gsub(/^# ?/, ' * ').gsub(/\+([^+]+)\+::/, '@param \1')}"
|
83
|
+
end
|
84
|
+
transduction = nil
|
85
|
+
elsif line.strip.start_with?('end') then
|
86
|
+
if class_closed then
|
87
|
+
transduction = nil
|
88
|
+
else
|
89
|
+
if line.start_with?('end') then
|
90
|
+
class_closed = true
|
91
|
+
cls.gsub!(/_namespace_#{java_class}\(/, 'return ResourceFactory.createResource(') unless namespace_wrapper_generated
|
92
|
+
end
|
93
|
+
transduction = line.sub(/end/, '}')
|
94
|
+
end
|
95
|
+
elsif line.strip.start_with?('if ') or line.strip.start_with?('elsif') then
|
96
|
+
transduction = "#{line.sub(/ then$/, '').sub('elsif', 'else if').gsub('@@', '__').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)").gsub(/(\w)\?\(/, '\1(')}".gsub(/\.has_key\(([^)]+)\)/, '.containsKey(\1)').gsub(/\[([^\]]+)\]/, '.get(\1)')
|
97
|
+
if transduction.match(/if ([^=]+|_namespace_[^=]+) ?== ?([^_].*|_namespace_.*)/)
|
98
|
+
transduction.sub!(/if ([^= ]+) ?== ?(\S+)\s+$/, 'if \1.equals(\2)')
|
99
|
+
transduction.sub!(/_namespace_\w+\('(\w+)'\)/, "\"#{namespace}\\1\"")
|
100
|
+
end
|
101
|
+
transduction.sub!(/\.equals\(("[^"]+")\)/, '.equals(ResourceFactory.createResource(\1))')
|
102
|
+
transduction.sub!(/if /, 'if (')
|
103
|
+
transduction.sub!(/$/, ') {')
|
104
|
+
elsif line.strip.start_with?('return [') then
|
105
|
+
transduction = line.sub(/return \[/, 'return new HashSet<Resource>(Arrays.asList(new Resource[] {').sub(/\]$/, '}));').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)")
|
106
|
+
elsif line.strip.start_with?('private') then
|
107
|
+
private_scope = true
|
108
|
+
namespace_wrapper_generated = true
|
109
|
+
transduction = " private static Resource _namespace_#{java_class}(String accession) {\n"
|
110
|
+
transduction << " if (isClass(ResourceFactory.createResource(\"#{namespace}\" + accession))) {\n"
|
111
|
+
transduction << " return ResourceFactory.createResource(\"#{namespace}\" + accession);\n"
|
112
|
+
transduction << " } else {\n"
|
113
|
+
transduction << " return ResourceFactory.createProperty(\"#{namespace}\" + accession);\n"
|
114
|
+
transduction << " }\n"
|
115
|
+
transduction << " }\n\n"
|
81
116
|
else
|
82
|
-
|
117
|
+
transduction = line.gsub('@@', '__').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)").gsub(/(\w+)\.select ?\{ ?\|(\w+)\| ?(.*[^ ]) ?\}/, 'new HashSet<Resource>(CollectionUtils.select(\1, new Predicate() { public boolean evaluate(Object \2) { return \3; } }))').gsub(/(\w)\?\(/, '\1(').gsub('has_parent(', 'hasParent((Resource)').gsub(/\[([^\]]+)\]/, '.get(\1)')
|
118
|
+
transduction.gsub!('__', '__') unless line.strip.start_with?('@@')
|
119
|
+
transduction << ';' unless line.strip.empty?
|
83
120
|
end
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
transduction
|
91
|
-
transduction
|
121
|
+
|
122
|
+
if transduction and transduction.strip.start_with?('__') then
|
123
|
+
variable = transduction.scan(/__\w+/)[0]
|
124
|
+
map = {}
|
125
|
+
map = Hash[*transduction.sub(/^.* \{/, '').sub(/\};/, '').split(',').map { |assignment| assignment.split('=>').map { |kv| kv.strip } }.flatten.map { |function| if function.match(/'\w+'/) then "#{namespace}#{function.scan(/'\w+'/)[0].gsub(/'/, '')}" else nil end }.compact ] if transduction.match(/\{\s*\S+.*\}/)
|
126
|
+
transduction = " private static Map<Resource, Resource> #{variable} = _init_#{variable}();\n\n"
|
127
|
+
transduction << " private static Map<Resource, Resource> _init_#{variable}() {\n"
|
128
|
+
transduction << " Map<Resource, Resource> map = new HashMap<Resource, Resource>();\n\n"
|
129
|
+
map.each_pair { |key, value| transduction << " map.put(ResourceFactory.createResource(\"#{key}\"), ResourceFactory.createResource(\"#{value}\"));\n" }
|
130
|
+
transduction << "\n"
|
131
|
+
transduction << " return map;\n"
|
132
|
+
transduction << " }\n"
|
92
133
|
end
|
93
|
-
transduction.sub!(/\.equals\(("[^"]+")\)/, '.equals(ResourceFactory.createResource(\1))')
|
94
|
-
transduction.sub!(/if /, 'if (')
|
95
|
-
transduction.sub!(/$/, ') {')
|
96
|
-
elsif line.strip.start_with?('return [') then
|
97
|
-
transduction = line.sub(/return \[/, 'return new HashSet<Resource>(Arrays.asList(new Resource[] {').sub(/\]$/, '}));').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)")
|
98
|
-
elsif line.strip.start_with?('private') then
|
99
|
-
private_scope = true
|
100
|
-
transduction = " private static Resource _namespace_#{java_class}(String accession) {\n"
|
101
|
-
transduction << " if (isClass(ResourceFactory.createResource(\"#{namespace}\" + accession))) {\n"
|
102
|
-
transduction << " return ResourceFactory.createResource(\"#{namespace}\" + accession);\n"
|
103
|
-
transduction << " } else {\n"
|
104
|
-
transduction << " return ResourceFactory.createProperty(\"#{namespace}\" + accession);\n"
|
105
|
-
transduction << " }\n"
|
106
|
-
transduction << " }\n\n"
|
107
|
-
else
|
108
|
-
transduction = line.gsub('@@', '__').gsub(/RDF::URI\.new\(([^)]+)\)/, "_namespace_#{java_class}(\\1)").gsub(/(\w+)\.select ?\{ ?\|(\w+)\| ?(.*[^ ]) ?\}/, 'new HashSet<Resource>(CollectionUtils.select(\1, new Predicate() { public boolean evaluate(Object \2) { return \3; } }))').gsub(/(\w)\?\(/, '\1(').gsub('has_parent(', 'hasParent((Resource)').gsub(/\[([^\]]+)\]/, '.get(\1)')
|
109
|
-
transduction.gsub!('__', '__') unless line.strip.start_with?('@@')
|
110
|
-
transduction << ';' unless line.strip.empty?
|
111
|
-
end
|
112
134
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
transduction << " private static Map<Resource, Resource> _init_#{variable}() {\n"
|
119
|
-
transduction << " Map<Resource, Resource> map = new HashMap<Resource, Resource>();\n\n"
|
120
|
-
map.each_pair { |key, value| transduction << " map.put(ResourceFactory.createResource(\"#{key}\"), ResourceFactory.createResource(\"#{value}\"));\n" }
|
121
|
-
transduction << "\n"
|
122
|
-
transduction << " return map;\n"
|
123
|
-
transduction << " }\n"
|
124
|
-
end
|
135
|
+
cls << "#{transduction}\n".gsub(/'/, '"') if transduction
|
136
|
+
}
|
137
|
+
|
138
|
+
cls
|
139
|
+
end
|
125
140
|
|
126
|
-
|
141
|
+
unless @loaded_externally then
|
142
|
+
namespace = nil
|
127
143
|
|
128
|
-
|
144
|
+
if ARGV.length == 1 then
|
145
|
+
namespace = ARGV[0]
|
146
|
+
end
|
129
147
|
|
130
|
-
puts
|
148
|
+
puts javaify(STDIN, namespace)
|
149
|
+
end
|
131
150
|
|