publisci 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +36 -0
- data/LICENSE.txt +20 -0
- data/README.md +51 -0
- data/README.rdoc +48 -0
- data/Rakefile +68 -0
- data/bin/bio-publisci +106 -0
- data/bin/bio-publisci-server +50 -0
- data/examples/bio-band_integration.rb +9 -0
- data/examples/no_magic.prov +58 -0
- data/examples/no_magic.rb +58 -0
- data/examples/orm.prov +48 -0
- data/examples/primer-full.prov +120 -0
- data/examples/primer.prov +66 -0
- data/examples/prov_dsl.prov +85 -0
- data/examples/safe_gen.rb +7 -0
- data/examples/visualization/primer.prov +66 -0
- data/examples/visualization/prov_viz.rb +140 -0
- data/examples/visualization/viz.rb +35 -0
- data/features/create_generator.feature +21 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/metadata.feature +37 -0
- data/features/metadata_steps.rb +40 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/prov_dsl.feature +14 -0
- data/features/prov_dsl_steps.rb +11 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +61 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +14 -0
- data/features/writer_steps.rb +24 -0
- data/lib/bio-publisci.rb +64 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/datacube_model.rb +111 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/configuration.rb +31 -0
- data/lib/bio-publisci/dataset/data_cube.rb +418 -0
- data/lib/bio-publisci/dataset/dataset.rb +11 -0
- data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
- data/lib/bio-publisci/dataset/interactive.rb +72 -0
- data/lib/bio-publisci/dsl/config.rb +34 -0
- data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
- data/lib/bio-publisci/dsl/dsl.rb +72 -0
- data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
- data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
- data/lib/bio-publisci/metadata/generator.rb +323 -0
- data/lib/bio-publisci/metadata/metadata.rb +5 -0
- data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
- data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
- data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
- data/lib/bio-publisci/metadata/prov/association.rb +107 -0
- data/lib/bio-publisci/metadata/prov/config.rb +34 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
- data/lib/bio-publisci/metadata/prov/element.rb +120 -0
- data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
- data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
- data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
- data/lib/bio-publisci/metadata/prov/role.rb +40 -0
- data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
- data/lib/bio-publisci/metadata/publisher.rb +25 -0
- data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
- data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
- data/lib/bio-publisci/mixins/registry.rb +27 -0
- data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
- data/lib/bio-publisci/output.rb +27 -0
- data/lib/bio-publisci/parser.rb +266 -0
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +123 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +49 -0
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +88 -0
- data/lib/bio-publisci/readers/dataframe.rb +67 -0
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +112 -0
- data/lib/bio-publisci/readers/r_matrix.rb +176 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +91 -0
- data/lib/bio-publisci/writers/base.rb +93 -0
- data/lib/bio-publisci/writers/csv.rb +31 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak.rb +12 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/code_resources.rq +10 -0
- data/resources/queries/codes.rq +18 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +12 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/measures.rq +12 -0
- data/resources/queries/observation_labels.rq +8 -0
- data/resources/queries/observations.rq +13 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/resources/queries/properties.rq +8 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +28 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +6 -0
- data/scripts/scan_islet.rb +6 -0
- data/scripts/update_reference.rb +25 -0
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +33 -0
- data/spec/ORM/prov_model_spec.rb +72 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +169 -0
- data/spec/dataset_for_spec.rb +77 -0
- data/spec/dsl_spec.rb +134 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +51 -0
- data/spec/generators/r_matrix_spec.rb +44 -0
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/metadata/metadata_dsl_spec.rb +68 -0
- data/spec/prov/activity_spec.rb +74 -0
- data/spec/prov/agent_spec.rb +54 -0
- data/spec/prov/association_spec.rb +55 -0
- data/spec/prov/config_spec.rb +28 -0
- data/spec/prov/derivation_spec.rb +30 -0
- data/spec/prov/entity_spec.rb +52 -0
- data/spec/prov/role_spec.rb +94 -0
- data/spec/prov/usage_spec.rb +98 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/resource/.RData +0 -0
- data/spec/resource/example.Rhistory +3 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +147 -0
- data/spec/turtle/reference +2064 -0
- data/spec/turtle/weather +275 -0
- data/spec/writer_spec.rb +75 -0
- metadata +589 -0
data/spec/dsl_spec.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
describe PubliSci::DSL do
|
4
|
+
include PubliSci::DSL
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
PubliSci::Prov.registry.clear
|
8
|
+
PubliSci::Metadata.registry.clear
|
9
|
+
PubliSci::Dataset.registry.clear
|
10
|
+
end
|
11
|
+
|
12
|
+
context "maf files" do
|
13
|
+
describe "set options" do
|
14
|
+
before { PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF) }
|
15
|
+
it "can change output type" do
|
16
|
+
|
17
|
+
dat = data do
|
18
|
+
object 'resources/maf_example.maf'
|
19
|
+
option :output, :print
|
20
|
+
end
|
21
|
+
|
22
|
+
str = generate_n3
|
23
|
+
str[/a qb:Observation/].should_not == nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it "can output to repository" do
|
27
|
+
dat = data do
|
28
|
+
object 'resources/maf_example.maf'
|
29
|
+
option :output, :print
|
30
|
+
end
|
31
|
+
|
32
|
+
repo = to_repository
|
33
|
+
repo.is_a?(RDF::Repository).should be true
|
34
|
+
repo.size.should > 0
|
35
|
+
|
36
|
+
qry = <<-EOF
|
37
|
+
SELECT ?observation where {
|
38
|
+
?observation a <http://purl.org/linked-data/cube#Observation>;
|
39
|
+
<http://onto.strinz.me/properties/Hugo_Symbol> ?node.
|
40
|
+
|
41
|
+
}
|
42
|
+
|
43
|
+
EOF
|
44
|
+
|
45
|
+
sparql = SPARQL::Client.new(repo)
|
46
|
+
sparql.query(qry).size.should > 0
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
it "can generate dataset, metadata, and provenance when given a script" do
|
52
|
+
|
53
|
+
dat = data do
|
54
|
+
object 'spec/csv/bacon.csv'
|
55
|
+
end
|
56
|
+
met = metadata do
|
57
|
+
name "Will"
|
58
|
+
end
|
59
|
+
prv = provenance do
|
60
|
+
entity :a_thing
|
61
|
+
end
|
62
|
+
|
63
|
+
met.should_not be nil
|
64
|
+
prv.should_not be nil
|
65
|
+
dat.should_not be nil
|
66
|
+
|
67
|
+
generate_n3.size.should > 0
|
68
|
+
end
|
69
|
+
|
70
|
+
it "can generate dataset, metadata, and provenance when given a script" do
|
71
|
+
ev = PubliSci::DSL::Instance.new
|
72
|
+
dat = ev.instance_eval <<-EOF
|
73
|
+
data do
|
74
|
+
object 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
|
75
|
+
end
|
76
|
+
EOF
|
77
|
+
dat.should_not be nil
|
78
|
+
ev.generate_n3.size.should > 0
|
79
|
+
end
|
80
|
+
|
81
|
+
it "can set generator options" do
|
82
|
+
dat = data do
|
83
|
+
object 'spec/csv/bacon.csv'
|
84
|
+
option :no_labels, true
|
85
|
+
end
|
86
|
+
|
87
|
+
str = generate_n3
|
88
|
+
str[/rdfs:label "\d"/].should == nil
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
it "can output to in-memory repository" do
|
94
|
+
dat = data do
|
95
|
+
object 'spec/csv/bacon.csv'
|
96
|
+
end
|
97
|
+
|
98
|
+
repo = to_repository
|
99
|
+
repo.is_a?(RDF::Repository).should be true
|
100
|
+
repo.size.should > 0
|
101
|
+
end
|
102
|
+
|
103
|
+
it "can output to 4store repository", no_travis: true do
|
104
|
+
configure do |cfg|
|
105
|
+
cfg.repository = :fourstore
|
106
|
+
end
|
107
|
+
|
108
|
+
dat = data do
|
109
|
+
object 'spec/csv/bacon.csv'
|
110
|
+
end
|
111
|
+
|
112
|
+
repo = RDF::FourStore::Repository.new('http://localhost:8080/')
|
113
|
+
old_size = repo.size
|
114
|
+
repo = to_repository
|
115
|
+
repo.is_a?(RDF::FourStore::Repository).should be true
|
116
|
+
repo.size.should > old_size
|
117
|
+
end
|
118
|
+
|
119
|
+
it "can output provenance to 4store", no_travis: true do
|
120
|
+
ev = PubliSci::Prov::DSL::Instance.new
|
121
|
+
str = IO.read('examples/primer-full.prov')
|
122
|
+
ev.instance_eval(str,'examples/primer-full.prov')
|
123
|
+
ev.instance_eval <<-EOF
|
124
|
+
configure do |cfg|
|
125
|
+
cfg.repository = :fourstore
|
126
|
+
end
|
127
|
+
EOF
|
128
|
+
repo = RDF::FourStore::Repository.new('http://localhost:8080/')
|
129
|
+
old_size = repo.size
|
130
|
+
repo = ev.to_repository
|
131
|
+
repo.is_a?(RDF::FourStore::Repository).should be true
|
132
|
+
repo.size.should > old_size
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# require_relative '../../lib/r2rdf/data_cube.rb'
|
2
|
+
# require_relative '../../lib/r2rdf/generators/csv.rb'
|
3
|
+
require_relative '../../lib/bio-publisci.rb'
|
4
|
+
|
5
|
+
# require 'rdf/turtle'
|
6
|
+
require 'tempfile'
|
7
|
+
|
8
|
+
describe PubliSci::Readers::CSV do
|
9
|
+
|
10
|
+
def create_graph(turtle_string)
|
11
|
+
f = Tempfile.new('graph')
|
12
|
+
f.write(turtle_string)
|
13
|
+
f.close
|
14
|
+
graph = RDF::Graph.load(f.path, :format => :ttl)
|
15
|
+
f.unlink
|
16
|
+
graph
|
17
|
+
end
|
18
|
+
|
19
|
+
before(:each) do
|
20
|
+
@generator = PubliSci::Readers::CSV.new
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'with reference CSV' do
|
24
|
+
it "should generate correct output for reference file" do
|
25
|
+
turtle_string = @generator.generate_n3(File.dirname(__FILE__) + '/../csv/bacon.csv','bacon',{dimensions:["producer","pricerange"], label_column:0})
|
26
|
+
ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
|
27
|
+
turtle_string.should == ref
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
it "selects first column as a coded dimension and creates measures from the rest by default" do
|
32
|
+
turtle_string = @generator.generate_n3(File.dirname(__FILE__) + '/../csv/bacon.csv','bacon')
|
33
|
+
graph = create_graph(turtle_string)
|
34
|
+
qb = RDF::Vocabulary.new("http://purl.org/linked-data/cube#")
|
35
|
+
|
36
|
+
dims = RDF::Query.execute(graph){ pattern [:dataset, qb.dimension, :dimension] }
|
37
|
+
dims.size.should == 1
|
38
|
+
dims.first[:dimension].to_s.should == "http://onto.strinz.me/properties/producer"
|
39
|
+
|
40
|
+
measures = RDF::Query.execute(graph){ pattern [:dataset, qb.measure, :measure] }
|
41
|
+
measures.map{|s| s[:measure].to_s.split('/').last}.should == ["pricerange", "chunkiness", "deliciousness"]
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require_relative '../../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
describe PubliSci::Readers::Dataframe do
|
4
|
+
|
5
|
+
def create_graph(turtle_string)
|
6
|
+
f = Tempfile.new('graph')
|
7
|
+
f.write(turtle_string)
|
8
|
+
f.close
|
9
|
+
graph = RDF::Graph.load(f.path, :format => :ttl)
|
10
|
+
f.unlink
|
11
|
+
graph
|
12
|
+
end
|
13
|
+
|
14
|
+
context "with r/qtl dataframe", no_travis: true do
|
15
|
+
before(:all) do
|
16
|
+
@r = Rserve::Connection.new
|
17
|
+
@generator = PubliSci::Readers::Dataframe.new
|
18
|
+
@r.eval <<-EOF
|
19
|
+
library(qtl)
|
20
|
+
data(listeria)
|
21
|
+
mr = scanone(listeria,method="mr")
|
22
|
+
EOF
|
23
|
+
@rexp = @r.eval 'mr'
|
24
|
+
@turtle = @generator.generate_n3(@rexp,'mr')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "generates rdf from R dataframe" do
|
28
|
+
turtle = @generator.generate_n3(@rexp,'mr')
|
29
|
+
turtle.is_a?(String).should be true
|
30
|
+
end
|
31
|
+
|
32
|
+
it "creates correct graph according to refrence file" do
|
33
|
+
reference = IO.read(File.dirname(__FILE__) + '/../turtle/reference')
|
34
|
+
@turtle.should eq reference
|
35
|
+
end
|
36
|
+
|
37
|
+
it "can optionally specify a row label" do
|
38
|
+
@turtle = @generator.generate_n3(@rexp,'mr',{row_label:"markers"})
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# require_relative '../../lib/r2rdf/data_cube.rb'
|
2
|
+
# require_relative '../../lib/r2rdf/generators/csv.rb'
|
3
|
+
require_relative '../../lib/bio-publisci.rb'
|
4
|
+
|
5
|
+
# require 'rdf/turtle'
|
6
|
+
require 'tempfile'
|
7
|
+
|
8
|
+
describe PubliSci::Readers::MAF do
|
9
|
+
before(:each) do
|
10
|
+
@generator = PubliSci::Readers::MAF.new
|
11
|
+
@in_file = 'resources/maf_example.maf'
|
12
|
+
end
|
13
|
+
|
14
|
+
describe ".generate_n3" do
|
15
|
+
def is_cube(str)
|
16
|
+
str[/a qb:Observation/].should_not be nil
|
17
|
+
str[/a rdf:Property, qb:DimensionProperty/].should_not be nil
|
18
|
+
str[/a rdf:Property, qb:MeasureProperty/].should_not be nil
|
19
|
+
str[/a qb:ComponentSpecification/].should_not be nil
|
20
|
+
end
|
21
|
+
|
22
|
+
context "print output" do
|
23
|
+
before { @str = @generator.generate_n3(@in_file, {output: :print})}
|
24
|
+
it { is_cube(@str) }
|
25
|
+
end
|
26
|
+
|
27
|
+
context "file output" do
|
28
|
+
before {
|
29
|
+
f = Tempfile.new('graph')
|
30
|
+
f.close
|
31
|
+
@generator.generate_n3(@in_file,{output: :file, output_base: f.path})
|
32
|
+
@str = IO.read(f.path+'.ttl')
|
33
|
+
open('resources/maf_rdf.ttl','w'){|f| f.write @str}
|
34
|
+
f.unlink
|
35
|
+
}
|
36
|
+
|
37
|
+
it { is_cube(@str) }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require_relative '../../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
describe PubliSci::Readers::RCross do
|
6
|
+
|
7
|
+
def create_graph(turtle_string)
|
8
|
+
f = Tempfile.new('graph')
|
9
|
+
f.write(turtle_string)
|
10
|
+
f.close
|
11
|
+
graph = RDF::Graph.load(f.path, :format => :ttl)
|
12
|
+
f.unlink
|
13
|
+
graph
|
14
|
+
end
|
15
|
+
|
16
|
+
context "with reduced listeria cross", no_travis: true do
|
17
|
+
before(:all) do
|
18
|
+
@r = Rserve::Connection.new
|
19
|
+
@generator = PubliSci::Readers::RCross.new
|
20
|
+
@r.eval <<-EOF
|
21
|
+
library(qtl)
|
22
|
+
data(listeria)
|
23
|
+
|
24
|
+
liscopy = listeria
|
25
|
+
|
26
|
+
for(i in 1:20)
|
27
|
+
liscopy$geno[[i]]$data <- as.matrix(liscopy$geno[[i]]$data[1:2,])
|
28
|
+
|
29
|
+
liscopy$pheno <- liscopy$phen[1:2,]
|
30
|
+
EOF
|
31
|
+
end
|
32
|
+
|
33
|
+
it "generators output to file by default", no_travis: true do
|
34
|
+
f=Tempfile.new('cross')
|
35
|
+
@generator.generate_n3(@r,'liscopy',f.path,{quiet: true})
|
36
|
+
turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_1.ttl")
|
37
|
+
graph = create_graph(turtle_string)
|
38
|
+
graph.size.should > 0
|
39
|
+
end
|
40
|
+
|
41
|
+
it "can generate string output", no_travis: true #do
|
42
|
+
# pending
|
43
|
+
# f=Tempfile.new('cross')
|
44
|
+
# turtle_string = @generator.generate_n3(@connection,'liscopy',f.path,{quiet: false, output: :string})
|
45
|
+
|
46
|
+
# graph = create_graph(turtle_string)
|
47
|
+
# graph.size.should > 0
|
48
|
+
# end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# require_relative '../../lib/r2rdf/data_cube.rb'
|
2
|
+
# require_relative '../../lib/r2rdf/generators/r_matrix.rb'
|
3
|
+
# require 'rdf/turtle'
|
4
|
+
# require 'rserve'
|
5
|
+
require_relative '../../lib/bio-publisci.rb'
|
6
|
+
|
7
|
+
require 'tempfile'
|
8
|
+
|
9
|
+
describe PubliSci::Readers::RMatrix do
|
10
|
+
|
11
|
+
def create_graph(turtle_string)
|
12
|
+
f = Tempfile.new('graph')
|
13
|
+
f.write(turtle_string)
|
14
|
+
f.close
|
15
|
+
graph = RDF::Graph.load(f.path, :format => :ttl)
|
16
|
+
f.unlink
|
17
|
+
graph
|
18
|
+
end
|
19
|
+
|
20
|
+
before(:each) do
|
21
|
+
@generator = PubliSci::Readers::RMatrix.new
|
22
|
+
@connection = Rserve::Connection.new
|
23
|
+
end
|
24
|
+
|
25
|
+
it "generators a simple output automatically", no_travis: true do
|
26
|
+
f=Tempfile.new('matrix')
|
27
|
+
@connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
|
28
|
+
@generator.generate_n3(@connection,'mat',f.path,{quiet: true})
|
29
|
+
|
30
|
+
turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_0.ttl")
|
31
|
+
graph = create_graph(turtle_string)
|
32
|
+
graph.size.should > 0
|
33
|
+
end
|
34
|
+
|
35
|
+
it "can generate string output", no_travis: true do
|
36
|
+
f=Tempfile.new('matrix')
|
37
|
+
@connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
|
38
|
+
turtle_string = @generator.generate_n3(@connection,'mat',f.path,{quiet: true, output: :string})
|
39
|
+
|
40
|
+
graph = create_graph(turtle_string)
|
41
|
+
graph.size.should > 0
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
File without changes
|
@@ -0,0 +1,343 @@
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
#sparql = SPARQL::Client.new("#{repo.uri}/sparql/").query(qry)
|
4
|
+
|
5
|
+
class MafQuery
|
6
|
+
RESTRICTIONS = {
|
7
|
+
patient: '<http://onto.strinz.me/properties/patient_id>',
|
8
|
+
sample: '<http://onto.strinz.me/properties/sample_id>',
|
9
|
+
gene: '<http://onto.strinz.me/properties/Hugo_Symbol>',
|
10
|
+
}
|
11
|
+
|
12
|
+
def to_por(solution)
|
13
|
+
if solution.is_a?(Fixnum) or solution.is_a?(String) or solution.is_a?(Symbol)
|
14
|
+
solution
|
15
|
+
elsif solution.is_a? RDF::Query::Solutions
|
16
|
+
solution.map{|sol|
|
17
|
+
if sol.bindings.size == 1
|
18
|
+
to_por(sol.bindings.first.last)
|
19
|
+
else
|
20
|
+
Hash(solution.bindings.map{|bind,result| [bind,to_por(result)]})
|
21
|
+
end
|
22
|
+
}
|
23
|
+
elsif solution.is_a? RDF::Query::Solution
|
24
|
+
if solution.bindings.size == 1
|
25
|
+
to_por(solution.bindings.first.last)
|
26
|
+
else
|
27
|
+
solution.bindings.map{|bind,result| [bind,to_por(result)] }
|
28
|
+
end
|
29
|
+
elsif solution.is_a? Array
|
30
|
+
if solution.size == 1
|
31
|
+
to_por(solution.first)
|
32
|
+
else
|
33
|
+
solution.map{|sol| to_por(sol)}
|
34
|
+
end
|
35
|
+
else
|
36
|
+
if solution.is_a? RDF::Literal
|
37
|
+
solution.object
|
38
|
+
elsif solution.is_a? RDF::URI
|
39
|
+
solution.to_s
|
40
|
+
else
|
41
|
+
puts "don't recognzize #{solution.class}"
|
42
|
+
solution.to_s
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_data
|
48
|
+
generator = PubliSci::Readers::MAF.new
|
49
|
+
in_file = 'resources/maf_example.maf'
|
50
|
+
f = Tempfile.new('graph')
|
51
|
+
f.close
|
52
|
+
generator.generate_n3(in_file, {output: :file, output_base: f.path})
|
53
|
+
repo = RDF::Repository.load(f.path+'.ttl')
|
54
|
+
File.delete(f.path+'.ttl')
|
55
|
+
f.unlink
|
56
|
+
repo
|
57
|
+
end
|
58
|
+
|
59
|
+
def select_patient_count(repo,patient_id="A8-A08G")
|
60
|
+
qry = IO.read('resources/queries/patient.rq')
|
61
|
+
qry = qry.gsub('%{patient}',patient_id)
|
62
|
+
SPARQL.execute(qry,repo).first[:barcodes]
|
63
|
+
end
|
64
|
+
|
65
|
+
def patients(repo)
|
66
|
+
qry = IO.read('resources/queries/patient_list.rq')
|
67
|
+
SPARQL.execute(qry,repo) #.map(&:id).map(&:to_s)
|
68
|
+
end
|
69
|
+
|
70
|
+
def select_patient_genes(repo,patient_id="A8-A08G")
|
71
|
+
qry = IO.read('resources/queries/gene.rq')
|
72
|
+
qry = qry.gsub('%{patient}',patient_id)
|
73
|
+
SPARQL.execute(qry,repo)
|
74
|
+
end
|
75
|
+
|
76
|
+
def select_property(repo,property=["Hugo_Symbol"],restrictions={})
|
77
|
+
# qry = IO.read('resources/queries/maf_column.rq').gsub('%{patient}',patient_id).gsub('%{column}',property)
|
78
|
+
property = Array(property)
|
79
|
+
selects = property
|
80
|
+
property = property.map{|prop|
|
81
|
+
RESTRICTIONS[prop.to_sym] || "<http://onto.strinz.me/properties/#{prop}>"
|
82
|
+
}
|
83
|
+
|
84
|
+
targets = ""
|
85
|
+
property.each_with_index{|p,i|
|
86
|
+
targets << "\n #{p} ?#{selects[i]} ;"
|
87
|
+
}
|
88
|
+
|
89
|
+
str = ""
|
90
|
+
restrictions.each{|restrict,value|
|
91
|
+
prop = RESTRICTIONS[restrict.to_sym] || "<http://onto.strinz.me/properties/#{restrict}>"
|
92
|
+
if value.is_a? String
|
93
|
+
if RDF::Resource(value).valid?
|
94
|
+
if(value[/http:\/\//])
|
95
|
+
value = RDF::Resource(value).to_base
|
96
|
+
end
|
97
|
+
else
|
98
|
+
value = '"' + value + '"'
|
99
|
+
end
|
100
|
+
end
|
101
|
+
str << "\n #{prop} #{value} ;"
|
102
|
+
}
|
103
|
+
|
104
|
+
|
105
|
+
qry = <<-EOF
|
106
|
+
PREFIX qb: <http://purl.org/linked-data/cube#>
|
107
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
108
|
+
PREFIX sio: <http://semanticscience.org/resource/>
|
109
|
+
|
110
|
+
SELECT DISTINCT ?#{selects.join(" ?")} WHERE {
|
111
|
+
?obs a qb:Observation;
|
112
|
+
#{str}
|
113
|
+
#{targets}
|
114
|
+
.
|
115
|
+
}
|
116
|
+
EOF
|
117
|
+
|
118
|
+
results = SPARQL.execute(qry,repo)
|
119
|
+
# results = results.map{ |solution|
|
120
|
+
# solution.bindings.map{ |bind,result| [bind, result]}
|
121
|
+
|
122
|
+
# # .map(&:column).map{|val|
|
123
|
+
# # if val.is_a?(RDF::URI) and val.to_s["node"]
|
124
|
+
# # node_value(repo,val)
|
125
|
+
# # else
|
126
|
+
# # val
|
127
|
+
# # end
|
128
|
+
|
129
|
+
# }.flatten
|
130
|
+
|
131
|
+
if results.size == 1
|
132
|
+
results.first
|
133
|
+
else
|
134
|
+
results
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def node_value(repo,uri)
|
139
|
+
qry = "SELECT DISTINCT ?p ?o where { <#{uri.to_s}> ?p ?o}"
|
140
|
+
SPARQL.execute(qry,repo).map{|sol|
|
141
|
+
if sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"
|
142
|
+
sol[:o]
|
143
|
+
elsif sol[:p].to_s == "http://semanticscience.org/resource/SIO_000008"
|
144
|
+
qry = "SELECT DISTINCT ?p ?o where { <#{sol[:o].to_s}> ?p ?o}"
|
145
|
+
SPARQL.execute(qry,repo).select{|sol| sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"}.first[:o]
|
146
|
+
elsif sol[:p].to_s != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
147
|
+
sol[:o]
|
148
|
+
end
|
149
|
+
}.reject{|sol| sol == nil}
|
150
|
+
end
|
151
|
+
|
152
|
+
def official_symbol(hugo_symbol)
|
153
|
+
qry = <<-EOF
|
154
|
+
|
155
|
+
SELECT distinct ?official where {
|
156
|
+
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
|
157
|
+
UNION
|
158
|
+
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
|
159
|
+
|
160
|
+
?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
|
161
|
+
}
|
162
|
+
|
163
|
+
EOF
|
164
|
+
|
165
|
+
sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
|
166
|
+
sparql.query(qry).map(&:official).first.to_s
|
167
|
+
end
|
168
|
+
|
169
|
+
def gene_length(hugo_symbol)
|
170
|
+
hugo_symbol = official_symbol(hugo_symbol.split('/').last)
|
171
|
+
qry = IO.read('resources/queries/hugo_to_ensembl.rq').gsub('%{hugo_symbol}',hugo_symbol)
|
172
|
+
sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
|
173
|
+
sol = sparql.query(qry)
|
174
|
+
|
175
|
+
if sol.size == 0
|
176
|
+
raise "No Ensembl entry found for #{hugo_symbol}"
|
177
|
+
else
|
178
|
+
ensemble_id = sol.map(&:ensembl).first.to_s.split(':').last
|
179
|
+
end
|
180
|
+
|
181
|
+
url = URI.parse('http://beta.rest.ensembl.org/')
|
182
|
+
http = Net::HTTP.new(url.host, url.port)
|
183
|
+
request = Net::HTTP::Get.new('/lookup/id/' + ensemble_id + '?format=full', {'Content-Type' => 'application/json'})
|
184
|
+
response = http.request(request)
|
185
|
+
|
186
|
+
if response.code != "200"
|
187
|
+
raise "Invalid response: #{response.code}"
|
188
|
+
else
|
189
|
+
js = JSON.parse(response.body)
|
190
|
+
js['end'] - js['start']
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def derive_gene_lengths
|
195
|
+
|
196
|
+
end
|
197
|
+
|
198
|
+
def patient_info(id,repo)
|
199
|
+
symbols = Array(to_por(select_property(repo,"Hugo_Symbol",patient: id)))
|
200
|
+
# patient_id = select_property(repo,"patient_id",patient: id).to_s
|
201
|
+
patient = {patient_id: id, mutation_count: symbols.size, mutations:[]}
|
202
|
+
|
203
|
+
symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
|
204
|
+
patient
|
205
|
+
end
|
206
|
+
|
207
|
+
def gene_info(hugo_symbol,repo)
|
208
|
+
qry = IO.read('resources/queries/patients_with_mutation.rq').gsub('%{hugo_symbol}',hugo_symbol)
|
209
|
+
sols = SPARQL.execute(qry,repo)
|
210
|
+
patient_count = sols.size
|
211
|
+
{mutations: patient_count, gene_length: gene_length(hugo_symbol), patients: sols.map(&:patient_id).map(&:to_s)}
|
212
|
+
|
213
|
+
# symbols = select_property(repo,"Hugo_Symbol",id).map(&:to_s)
|
214
|
+
# patient_id = select_property(repo,"patient_id",id).first.to_s
|
215
|
+
# patient = {patient_id: patient_id, mutation_count: symbols.size, mutations:[]}
|
216
|
+
|
217
|
+
# symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
|
218
|
+
# patient
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
describe MafQuery do
|
225
|
+
before(:all) do
|
226
|
+
@maf = MafQuery.new
|
227
|
+
@repo = @maf.generate_data
|
228
|
+
end
|
229
|
+
|
230
|
+
describe "query genes" do
|
231
|
+
it { @maf.select_patient_genes(@repo,"BH-A0HP").size.should > 0 }
|
232
|
+
end
|
233
|
+
|
234
|
+
describe "query number of entries" do
|
235
|
+
it { @maf.select_patient_count(@repo,"BH-A0HP").should > 0 }
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
describe ".patients" do
|
240
|
+
it "retrieves a list of patients" do
|
241
|
+
@maf.to_por(@maf.patients(@repo)).first.should == "E9-A22B"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
describe ".select_property" do
|
246
|
+
it { @maf.to_por(@maf.select_property(@repo,"Hugo_Symbol", patient: "BH-A0HP")).should == "http://identifiers.org/hgnc.symbol/A1CF" }
|
247
|
+
it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
|
248
|
+
it { @maf.select_property(@repo,"Center",patient: "BH-A0HP")[:Center].to_s.should == "genome.wustl.edu" }
|
249
|
+
it { @maf.select_property(@repo,"NCBI_Build",patient: "BH-A0HP")[:NCBI_Build].to_i.should == 37 }
|
250
|
+
|
251
|
+
context "extra parsed properties" do
|
252
|
+
it { @maf.select_property(@repo,"sample_id",patient: "BH-A0HP")[:sample_id].should == "01A-12D-A099-09" }
|
253
|
+
it { @maf.select_property(@repo,"patient_id",patient: "BH-A0HP")[:patient_id].should == "BH-A0HP" }
|
254
|
+
end
|
255
|
+
|
256
|
+
context "multiple restrictions" do
|
257
|
+
it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 10)[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
|
258
|
+
it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 2).should == [] }
|
259
|
+
end
|
260
|
+
|
261
|
+
context "multiple selections" do
|
262
|
+
it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
|
263
|
+
it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Hugo_Symbol].to_s.should == 'http://identifiers.org/hgnc.symbol/A1CF' }
|
264
|
+
|
265
|
+
end
|
266
|
+
|
267
|
+
context "non-existant properties" do
|
268
|
+
it { @maf.select_property(@repo,"Chunkiness",patient: "BH-A0HP").should == [] }
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
context "remote service calls", no_travis: true do
|
273
|
+
describe ".gene_length" do
|
274
|
+
it { @maf.gene_length('A2BP1').should == 1694245 }
|
275
|
+
end
|
276
|
+
|
277
|
+
# describe ".official_symbol" do
|
278
|
+
# it { @maf.official_symbol('A2BP1').should == 'RBFOX1' }
|
279
|
+
# end
|
280
|
+
|
281
|
+
describe ".gene_info" do
|
282
|
+
it 'collects the number of mutations and gene lengths for each mutation' do
|
283
|
+
gene = @maf.gene_info('A1BG',@repo)
|
284
|
+
gene[:mutations].should == 2
|
285
|
+
gene[:gene_length].should == 8321
|
286
|
+
gene[:patients].first.should == "E9-A22B"
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
describe ".patient_info" do
|
291
|
+
it 'collects the number of patients with a mutation in a gene and its length' do
|
292
|
+
patient = @maf.patient_info('BH-A0HP',@repo)
|
293
|
+
patient[:mutation_count].should == 1
|
294
|
+
patient[:mutations].first[:length].should == 79113
|
295
|
+
patient[:mutations].first[:symbol].should == 'http://identifiers.org/hgnc.symbol/A1CF'
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class QueryScript
|
302
|
+
def initialize(repo=nil)
|
303
|
+
@__maf = MafQuery.new
|
304
|
+
unless repo
|
305
|
+
@__repo = @__maf.generate_data
|
306
|
+
else
|
307
|
+
@__repo = repo
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def select(operation,*args)
|
312
|
+
if @__maf.methods.include?(:"select_#{operation}")
|
313
|
+
@__maf.to_por(@__maf.send(:"select_#{operation}",@__repo,*args))
|
314
|
+
else
|
315
|
+
@__maf.to_por(@__maf.select_property(@__repo,operation,*args))
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def gene_length(gene)
|
320
|
+
@__maf.to_por(@__maf.gene_length(gene))
|
321
|
+
end
|
322
|
+
|
323
|
+
def report_for(type, id)
|
324
|
+
@__maf.send(:"#{type}_info",id, @__repo)
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
describe QueryScript do
|
329
|
+
describe ".select" do
|
330
|
+
before(:all){
|
331
|
+
@ev = QueryScript.new
|
332
|
+
}
|
333
|
+
|
334
|
+
it { @ev.select('patient_count', "BH-A0HP").should > 0 }
|
335
|
+
|
336
|
+
context "with instance_eval" do
|
337
|
+
it { @ev.instance_eval("select 'patient_count', patient: 'BH-A0HP'").should > 0 }
|
338
|
+
it { @ev.instance_eval("select 'Hugo_Symbol', patient: 'BH-A0HP'").should == 'http://identifiers.org/hgnc.symbol/A1CF' }
|
339
|
+
it { @ev.instance_eval("select 'Chromosome', patient: 'BH-A0HP'").is_a?(Fixnum).should be true }
|
340
|
+
it { @ev.instance_eval("report_for 'patient', 'BH-A0HP'").is_a?(Hash).should be true }
|
341
|
+
end
|
342
|
+
end
|
343
|
+
end
|