bio-publisci 0.0.8 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +10 -0
- data/Rakefile +1 -1
- data/bin/bio-publisci-server +50 -0
- data/features/reader_steps.rb +1 -1
- data/lib/bio-publisci.rb +11 -2
- data/lib/bio-publisci/datacube_model.rb +92 -88
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +0 -1
- data/lib/bio-publisci/dataset/data_cube.rb +78 -44
- data/lib/bio-publisci/dataset/dataset_for.rb +26 -27
- data/lib/bio-publisci/metadata/metadata_model.rb +21 -23
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +5 -5
- data/lib/bio-publisci/output.rb +1 -1
- data/lib/bio-publisci/parser.rb +130 -12
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +13 -8
- data/lib/bio-publisci/readers/arff.rb +1 -1
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +2 -5
- data/lib/bio-publisci/readers/dataframe.rb +2 -2
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +6 -10
- data/lib/bio-publisci/readers/r_matrix.rb +1 -1
- data/lib/bio-publisci/writers/base.rb +16 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +1 -1
- data/scripts/scan_islet.rb +1 -1
- data/scripts/update_reference.rb +8 -3
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +1 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/data_cube_spec.rb +66 -63
- data/spec/dataset_for_spec.rb +36 -16
- data/spec/dsl_spec.rb +41 -0
- data/spec/generators/csv_spec.rb +3 -3
- data/spec/generators/dataframe_spec.rb +2 -2
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +2 -2
- data/spec/generators/r_matrix_spec.rb +2 -2
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/resource/example.Rhistory +1 -1
- data/spec/turtle/bacon +9 -9
- data/spec/turtle/reference +43 -43
- data/spec/turtle/weather +10 -10
- data/spec/writer_spec.rb +16 -2
- metadata +212 -61
data/spec/data_cube_spec.rb
CHANGED
@@ -52,12 +52,14 @@ describe PubliSci::Dataset::DataCube do
|
|
52
52
|
|
53
53
|
it "skips observations with missing values by default" do
|
54
54
|
turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
55
|
+
turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
|
56
|
+
turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should be nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it "includes observations with missing values if flag is set" do
|
60
|
+
turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon',{encode_nulls: true})
|
61
|
+
turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
|
62
|
+
turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should_not be nil
|
61
63
|
end
|
62
64
|
|
63
65
|
end
|
@@ -102,65 +104,66 @@ describe PubliSci::Dataset::DataCube do
|
|
102
104
|
observations.is_a?(Array).should == true
|
103
105
|
observations.first.is_a?(String).should == true
|
104
106
|
end
|
105
|
-
end
|
106
107
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
@checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
it 'obeys IC-1, has a unique dataset for each observation' do
|
119
|
-
SPARQL.execute(@checks['1'], @graph).first.should be_nil
|
120
|
-
end
|
121
|
-
|
122
|
-
it 'obeys IC-2, has a unique data structure definition of each dataset' do
|
123
|
-
SPARQL.execute(@checks['2'], @graph).first.should be_nil
|
124
|
-
end
|
125
|
-
|
126
|
-
it 'obeys IC-3, has a measure property specified for each dataset' do
|
127
|
-
SPARQL.execute(@checks['3'], @graph).first.should be_nil
|
128
|
-
end
|
129
|
-
|
130
|
-
it 'obeys IC-4, specifies a range for all dimensions' do
|
131
|
-
SPARQL.execute(@checks['4'], @graph).first.should be_nil
|
132
|
-
end
|
133
|
-
|
134
|
-
it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
|
135
|
-
SPARQL.execute(@checks['5'], @graph).first.should be_nil
|
136
|
-
end
|
137
|
-
|
138
|
-
it 'obeys IC-11, has a value for each dimension in every observation' do
|
139
|
-
SPARQL.execute(@checks['11'], @graph).first.should be_nil
|
140
|
-
end
|
141
|
-
|
142
|
-
## currently locks up. possible bug in SPARQL gem parsing?
|
143
|
-
## works fine as a raw query
|
144
|
-
# it 'obeys IC-12, has do duplicate observations' do
|
145
|
-
# SPARQL.execute(@checks['12'], @graph).first.should be_nil
|
146
|
-
# end
|
147
|
-
|
148
|
-
it 'obeys IC-14, has a value for each measure in every observation' do
|
149
|
-
SPARQL.execute(@checks['14'], @graph).first.should be_nil
|
150
|
-
end
|
151
|
-
|
152
|
-
it 'obeys IC-19, all codes for each codeList are included' do
|
153
|
-
SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
|
154
|
-
## second query for IC-19 uses property paths that aren't as easy to
|
155
|
-
## convert to sparql 1.0, so for now I've left it out
|
156
|
-
# SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
|
157
|
-
end
|
108
|
+
it "coerces single values into arrays" do
|
109
|
+
newdata = Hash[@data.map{|k,v| [k,v.first] }]
|
110
|
+
observations = @generator.observations(@measures, @dimensions, @codes, newdata, @labels[0], "bacon")
|
111
|
+
observations.is_a?(Array).should == true
|
112
|
+
observations.first.is_a?(String).should == true
|
113
|
+
end
|
158
114
|
end
|
159
115
|
|
160
116
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
117
|
+
context "under official integrity constraints" do
|
118
|
+
before(:all) do
|
119
|
+
@graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
|
120
|
+
@checks = {}
|
121
|
+
Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
|
122
|
+
if file.split('.').last == 'rq'
|
123
|
+
@checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'obeys IC-1, has a unique dataset for each observation' do
|
129
|
+
SPARQL.execute(@checks['1'], @graph).first.should be_nil
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'obeys IC-2, has a unique data structure definition of each dataset' do
|
133
|
+
SPARQL.execute(@checks['2'], @graph).first.should be_nil
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'obeys IC-3, has a measure property specified for each dataset' do
|
137
|
+
SPARQL.execute(@checks['3'], @graph).first.should be_nil
|
138
|
+
end
|
139
|
+
|
140
|
+
it 'obeys IC-4, specifies a range for all dimensions' do
|
141
|
+
SPARQL.execute(@checks['4'], @graph).first.should be_nil
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
|
145
|
+
SPARQL.execute(@checks['5'], @graph).first.should be_nil
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'obeys IC-11, has a value for each dimension in every observation' do
|
149
|
+
SPARQL.execute(@checks['11'], @graph).first.should be_nil
|
150
|
+
end
|
151
|
+
|
152
|
+
## currently locks up. possible bug in SPARQL gem parsing?
|
153
|
+
## works fine as a raw query
|
154
|
+
# it 'obeys IC-12, has do duplicate observations' do
|
155
|
+
# SPARQL.execute(@checks['12'], @graph).first.should be_nil
|
156
|
+
# end
|
157
|
+
|
158
|
+
it 'obeys IC-14, has a value for each measure in every observation' do
|
159
|
+
SPARQL.execute(@checks['14'], @graph).first.should be_nil
|
160
|
+
end
|
161
|
+
|
162
|
+
it 'obeys IC-19, all codes for each codeList are included' do
|
163
|
+
SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
|
164
|
+
## second query for IC-19 uses property paths that aren't as easy to
|
165
|
+
## convert to sparql 1.0, so for now I've left it out
|
166
|
+
# SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
|
167
|
+
end
|
168
|
+
end
|
166
169
|
end
|
data/spec/dataset_for_spec.rb
CHANGED
@@ -1,6 +1,40 @@
|
|
1
1
|
require_relative '../lib/bio-publisci.rb'
|
2
2
|
|
3
3
|
describe PubliSci::Dataset do
|
4
|
+
it "should use sio:has_value for unknown string types" do
|
5
|
+
pending("pending refactor dataset_for to handle raw remote files better")
|
6
|
+
turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
|
7
|
+
(turtle_string =~ /hasValue/).should_not be nil
|
8
|
+
# open('ttl.ttl','w'){|f| f.write turtle_string}
|
9
|
+
repo = RDF::Repository.new
|
10
|
+
|
11
|
+
f = Tempfile.new(['repo','.ttl'])
|
12
|
+
f.write(turtle_string)
|
13
|
+
f.close
|
14
|
+
repo.load(f.path, :format => :ttl)
|
15
|
+
f.unlink
|
16
|
+
|
17
|
+
repo.size.should > 0
|
18
|
+
end
|
19
|
+
|
20
|
+
it "can convert arff files" do
|
21
|
+
turtle_string = PubliSci::Dataset.for('resources/weather.numeric.arff',false)
|
22
|
+
turtle_string.should == IO.read('spec/turtle/weather')
|
23
|
+
end
|
24
|
+
|
25
|
+
describe ".register_reader" do
|
26
|
+
it "can register readers to be used by Dataset.for" do
|
27
|
+
PubliSci::Dataset.reader_registry.clear
|
28
|
+
expect { PubliSci::Dataset.for('resources/maf_example.maf') }.to raise_error
|
29
|
+
PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF)
|
30
|
+
file = PubliSci::Dataset.for('resources/maf_example.maf')
|
31
|
+
str = IO.read(file)
|
32
|
+
File.delete(file.path)
|
33
|
+
str.size.should > 0
|
34
|
+
(str =~ /qb:Observation/).should_not be nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
4
38
|
context 'with a csv file' do
|
5
39
|
before(:all) do
|
6
40
|
@file = File.dirname(__FILE__) + '/csv/bacon.csv'
|
@@ -11,21 +45,6 @@ describe PubliSci::Dataset do
|
|
11
45
|
(turtle_string =~ /qb:Observation/).should_not be nil
|
12
46
|
end
|
13
47
|
|
14
|
-
it "should use sio:has_value for unknown string types" do
|
15
|
-
turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
|
16
|
-
(turtle_string =~ /hasValue/).should_not be nil
|
17
|
-
# open('ttl.ttl','w'){|f| f.write turtle_string}
|
18
|
-
repo = RDF::Repository.new
|
19
|
-
|
20
|
-
f = Tempfile.new(['repo','.ttl'])
|
21
|
-
f.write(turtle_string)
|
22
|
-
f.close
|
23
|
-
repo.load(f.path, :format => :ttl)
|
24
|
-
f.unlink
|
25
|
-
|
26
|
-
repo.size.should > 0
|
27
|
-
end
|
28
|
-
|
29
48
|
it "will download remote files" do
|
30
49
|
turtle_string = PubliSci::Dataset.for('https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv',false)
|
31
50
|
(turtle_string =~ /prop:pricerange/).should_not be nil
|
@@ -33,7 +52,7 @@ describe PubliSci::Dataset do
|
|
33
52
|
end
|
34
53
|
|
35
54
|
it "will request user input if not provided" do
|
36
|
-
gen = PubliSci::
|
55
|
+
gen = PubliSci::Readers::CSV.new
|
37
56
|
gen.stub(:gets).and_return('pricerange,producer')
|
38
57
|
gen.stub(:puts)
|
39
58
|
turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
|
@@ -54,4 +73,5 @@ describe PubliSci::Dataset do
|
|
54
73
|
(turtle_string =~ /prop:producer/).should_not be nil
|
55
74
|
end
|
56
75
|
end
|
76
|
+
|
57
77
|
end
|
data/spec/dsl_spec.rb
CHANGED
@@ -9,6 +9,45 @@ describe PubliSci::DSL do
|
|
9
9
|
PubliSci::Dataset.registry.clear
|
10
10
|
end
|
11
11
|
|
12
|
+
context "maf files" do
|
13
|
+
describe "set options" do
|
14
|
+
before { PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF) }
|
15
|
+
it "can change output type" do
|
16
|
+
|
17
|
+
dat = data do
|
18
|
+
object 'resources/maf_example.maf'
|
19
|
+
option :output, :print
|
20
|
+
end
|
21
|
+
|
22
|
+
str = generate_n3
|
23
|
+
str[/a qb:Observation/].should_not == nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it "can output to repository" do
|
27
|
+
dat = data do
|
28
|
+
object 'resources/maf_example.maf'
|
29
|
+
option :output, :print
|
30
|
+
end
|
31
|
+
|
32
|
+
repo = to_repository
|
33
|
+
repo.is_a?(RDF::Repository).should be true
|
34
|
+
repo.size.should > 0
|
35
|
+
|
36
|
+
qry = <<-EOF
|
37
|
+
SELECT ?observation where {
|
38
|
+
?observation a <http://purl.org/linked-data/cube#Observation>;
|
39
|
+
<http://onto.strinz.me/properties/Hugo_Symbol> ?node.
|
40
|
+
|
41
|
+
}
|
42
|
+
|
43
|
+
EOF
|
44
|
+
|
45
|
+
sparql = SPARQL::Client.new(repo)
|
46
|
+
sparql.query(qry).size.should > 0
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
12
51
|
it "can generate dataset, metadata, and provenance when given a script" do
|
13
52
|
|
14
53
|
dat = data do
|
@@ -49,6 +88,8 @@ describe PubliSci::DSL do
|
|
49
88
|
str[/rdfs:label "\d"/].should == nil
|
50
89
|
end
|
51
90
|
|
91
|
+
|
92
|
+
|
52
93
|
it "can output to in-memory repository" do
|
53
94
|
dat = data do
|
54
95
|
object 'spec/csv/bacon.csv'
|
data/spec/generators/csv_spec.rb
CHANGED
@@ -5,7 +5,7 @@ require_relative '../../lib/bio-publisci.rb'
|
|
5
5
|
# require 'rdf/turtle'
|
6
6
|
require 'tempfile'
|
7
7
|
|
8
|
-
describe PubliSci::
|
8
|
+
describe PubliSci::Readers::CSV do
|
9
9
|
|
10
10
|
def create_graph(turtle_string)
|
11
11
|
f = Tempfile.new('graph')
|
@@ -17,7 +17,7 @@ describe PubliSci::Reader::CSV do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
before(:each) do
|
20
|
-
@generator = PubliSci::
|
20
|
+
@generator = PubliSci::Readers::CSV.new
|
21
21
|
end
|
22
22
|
|
23
23
|
context 'with reference CSV' do
|
@@ -35,7 +35,7 @@ describe PubliSci::Reader::CSV do
|
|
35
35
|
|
36
36
|
dims = RDF::Query.execute(graph){ pattern [:dataset, qb.dimension, :dimension] }
|
37
37
|
dims.size.should == 1
|
38
|
-
dims.first[:dimension].to_s.should == "http://
|
38
|
+
dims.first[:dimension].to_s.should == "http://onto.strinz.me/properties/producer"
|
39
39
|
|
40
40
|
measures = RDF::Query.execute(graph){ pattern [:dataset, qb.measure, :measure] }
|
41
41
|
measures.map{|s| s[:measure].to_s.split('/').last}.should == ["pricerange", "chunkiness", "deliciousness"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative '../../lib/bio-publisci.rb'
|
2
2
|
|
3
|
-
describe PubliSci::
|
3
|
+
describe PubliSci::Readers::Dataframe do
|
4
4
|
|
5
5
|
def create_graph(turtle_string)
|
6
6
|
f = Tempfile.new('graph')
|
@@ -14,7 +14,7 @@ describe PubliSci::Reader::Dataframe do
|
|
14
14
|
context "with r/qtl dataframe", no_travis: true do
|
15
15
|
before(:all) do
|
16
16
|
@r = Rserve::Connection.new
|
17
|
-
@generator = PubliSci::
|
17
|
+
@generator = PubliSci::Readers::Dataframe.new
|
18
18
|
@r.eval <<-EOF
|
19
19
|
library(qtl)
|
20
20
|
data(listeria)
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# require_relative '../../lib/r2rdf/data_cube.rb'
|
2
|
+
# require_relative '../../lib/r2rdf/generators/csv.rb'
|
3
|
+
require_relative '../../lib/bio-publisci.rb'
|
4
|
+
|
5
|
+
# require 'rdf/turtle'
|
6
|
+
require 'tempfile'
|
7
|
+
|
8
|
+
describe PubliSci::Readers::MAF do
|
9
|
+
before(:each) do
|
10
|
+
@generator = PubliSci::Readers::MAF.new
|
11
|
+
@in_file = 'resources/maf_example.maf'
|
12
|
+
end
|
13
|
+
|
14
|
+
describe ".generate_n3" do
|
15
|
+
def is_cube(str)
|
16
|
+
str[/a qb:Observation/].should_not be nil
|
17
|
+
str[/a rdf:Property, qb:DimensionProperty/].should_not be nil
|
18
|
+
str[/a rdf:Property, qb:MeasureProperty/].should_not be nil
|
19
|
+
str[/a qb:ComponentSpecification/].should_not be nil
|
20
|
+
end
|
21
|
+
|
22
|
+
context "print output" do
|
23
|
+
before { @str = @generator.generate_n3(@in_file, {output: :print})}
|
24
|
+
it { is_cube(@str) }
|
25
|
+
end
|
26
|
+
|
27
|
+
context "file output" do
|
28
|
+
before {
|
29
|
+
f = Tempfile.new('graph')
|
30
|
+
f.close
|
31
|
+
@generator.generate_n3(@in_file,{output: :file, output_base: f.path})
|
32
|
+
@str = IO.read(f.path+'.ttl')
|
33
|
+
open('resources/maf_rdf.ttl','w'){|f| f.write @str}
|
34
|
+
f.unlink
|
35
|
+
}
|
36
|
+
|
37
|
+
it { is_cube(@str) }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -2,7 +2,7 @@ require_relative '../../lib/bio-publisci.rb'
|
|
2
2
|
|
3
3
|
require 'tempfile'
|
4
4
|
|
5
|
-
describe PubliSci::
|
5
|
+
describe PubliSci::Readers::RCross do
|
6
6
|
|
7
7
|
def create_graph(turtle_string)
|
8
8
|
f = Tempfile.new('graph')
|
@@ -16,7 +16,7 @@ describe PubliSci::Reader::RCross do
|
|
16
16
|
context "with reduced listeria cross", no_travis: true do
|
17
17
|
before(:all) do
|
18
18
|
@r = Rserve::Connection.new
|
19
|
-
@generator = PubliSci::
|
19
|
+
@generator = PubliSci::Readers::RCross.new
|
20
20
|
@r.eval <<-EOF
|
21
21
|
library(qtl)
|
22
22
|
data(listeria)
|
@@ -6,7 +6,7 @@ require_relative '../../lib/bio-publisci.rb'
|
|
6
6
|
|
7
7
|
require 'tempfile'
|
8
8
|
|
9
|
-
describe PubliSci::
|
9
|
+
describe PubliSci::Readers::RMatrix do
|
10
10
|
|
11
11
|
def create_graph(turtle_string)
|
12
12
|
f = Tempfile.new('graph')
|
@@ -18,7 +18,7 @@ describe PubliSci::Reader::RMatrix do
|
|
18
18
|
end
|
19
19
|
|
20
20
|
before(:each) do
|
21
|
-
@generator = PubliSci::
|
21
|
+
@generator = PubliSci::Readers::RMatrix.new
|
22
22
|
@connection = Rserve::Connection.new
|
23
23
|
end
|
24
24
|
|
File without changes
|
@@ -0,0 +1,343 @@
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
#sparql = SPARQL::Client.new("#{repo.uri}/sparql/").query(qry)
|
4
|
+
|
5
|
+
class MafQuery
|
6
|
+
RESTRICTIONS = {
|
7
|
+
patient: '<http://onto.strinz.me/properties/patient_id>',
|
8
|
+
sample: '<http://onto.strinz.me/properties/sample_id>',
|
9
|
+
gene: '<http://onto.strinz.me/properties/Hugo_Symbol>',
|
10
|
+
}
|
11
|
+
|
12
|
+
def to_por(solution)
|
13
|
+
if solution.is_a?(Fixnum) or solution.is_a?(String) or solution.is_a?(Symbol)
|
14
|
+
solution
|
15
|
+
elsif solution.is_a? RDF::Query::Solutions
|
16
|
+
solution.map{|sol|
|
17
|
+
if sol.bindings.size == 1
|
18
|
+
to_por(sol.bindings.first.last)
|
19
|
+
else
|
20
|
+
Hash(solution.bindings.map{|bind,result| [bind,to_por(result)]})
|
21
|
+
end
|
22
|
+
}
|
23
|
+
elsif solution.is_a? RDF::Query::Solution
|
24
|
+
if solution.bindings.size == 1
|
25
|
+
to_por(solution.bindings.first.last)
|
26
|
+
else
|
27
|
+
solution.bindings.map{|bind,result| [bind,to_por(result)] }
|
28
|
+
end
|
29
|
+
elsif solution.is_a? Array
|
30
|
+
if solution.size == 1
|
31
|
+
to_por(solution.first)
|
32
|
+
else
|
33
|
+
solution.map{|sol| to_por(sol)}
|
34
|
+
end
|
35
|
+
else
|
36
|
+
if solution.is_a? RDF::Literal
|
37
|
+
solution.object
|
38
|
+
elsif solution.is_a? RDF::URI
|
39
|
+
solution.to_s
|
40
|
+
else
|
41
|
+
puts "don't recognzize #{solution.class}"
|
42
|
+
solution.to_s
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_data
|
48
|
+
generator = PubliSci::Readers::MAF.new
|
49
|
+
in_file = 'resources/maf_example.maf'
|
50
|
+
f = Tempfile.new('graph')
|
51
|
+
f.close
|
52
|
+
generator.generate_n3(in_file, {output: :file, output_base: f.path})
|
53
|
+
repo = RDF::Repository.load(f.path+'.ttl')
|
54
|
+
File.delete(f.path+'.ttl')
|
55
|
+
f.unlink
|
56
|
+
repo
|
57
|
+
end
|
58
|
+
|
59
|
+
def select_patient_count(repo,patient_id="A8-A08G")
|
60
|
+
qry = IO.read('resources/queries/patient.rq')
|
61
|
+
qry = qry.gsub('%{patient}',patient_id)
|
62
|
+
SPARQL.execute(qry,repo).first[:barcodes]
|
63
|
+
end
|
64
|
+
|
65
|
+
def patients(repo)
|
66
|
+
qry = IO.read('resources/queries/patient_list.rq')
|
67
|
+
SPARQL.execute(qry,repo) #.map(&:id).map(&:to_s)
|
68
|
+
end
|
69
|
+
|
70
|
+
def select_patient_genes(repo,patient_id="A8-A08G")
|
71
|
+
qry = IO.read('resources/queries/gene.rq')
|
72
|
+
qry = qry.gsub('%{patient}',patient_id)
|
73
|
+
SPARQL.execute(qry,repo)
|
74
|
+
end
|
75
|
+
|
76
|
+
def select_property(repo,property=["Hugo_Symbol"],restrictions={})
|
77
|
+
# qry = IO.read('resources/queries/maf_column.rq').gsub('%{patient}',patient_id).gsub('%{column}',property)
|
78
|
+
property = Array(property)
|
79
|
+
selects = property
|
80
|
+
property = property.map{|prop|
|
81
|
+
RESTRICTIONS[prop.to_sym] || "<http://onto.strinz.me/properties/#{prop}>"
|
82
|
+
}
|
83
|
+
|
84
|
+
targets = ""
|
85
|
+
property.each_with_index{|p,i|
|
86
|
+
targets << "\n #{p} ?#{selects[i]} ;"
|
87
|
+
}
|
88
|
+
|
89
|
+
str = ""
|
90
|
+
restrictions.each{|restrict,value|
|
91
|
+
prop = RESTRICTIONS[restrict.to_sym] || "<http://onto.strinz.me/properties/#{restrict}>"
|
92
|
+
if value.is_a? String
|
93
|
+
if RDF::Resource(value).valid?
|
94
|
+
if(value[/http:\/\//])
|
95
|
+
value = RDF::Resource(value).to_base
|
96
|
+
end
|
97
|
+
else
|
98
|
+
value = '"' + value + '"'
|
99
|
+
end
|
100
|
+
end
|
101
|
+
str << "\n #{prop} #{value} ;"
|
102
|
+
}
|
103
|
+
|
104
|
+
|
105
|
+
qry = <<-EOF
|
106
|
+
PREFIX qb: <http://purl.org/linked-data/cube#>
|
107
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
108
|
+
PREFIX sio: <http://semanticscience.org/resource/>
|
109
|
+
|
110
|
+
SELECT DISTINCT ?#{selects.join(" ?")} WHERE {
|
111
|
+
?obs a qb:Observation;
|
112
|
+
#{str}
|
113
|
+
#{targets}
|
114
|
+
.
|
115
|
+
}
|
116
|
+
EOF
|
117
|
+
|
118
|
+
results = SPARQL.execute(qry,repo)
|
119
|
+
# results = results.map{ |solution|
|
120
|
+
# solution.bindings.map{ |bind,result| [bind, result]}
|
121
|
+
|
122
|
+
# # .map(&:column).map{|val|
|
123
|
+
# # if val.is_a?(RDF::URI) and val.to_s["node"]
|
124
|
+
# # node_value(repo,val)
|
125
|
+
# # else
|
126
|
+
# # val
|
127
|
+
# # end
|
128
|
+
|
129
|
+
# }.flatten
|
130
|
+
|
131
|
+
if results.size == 1
|
132
|
+
results.first
|
133
|
+
else
|
134
|
+
results
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def node_value(repo,uri)
|
139
|
+
qry = "SELECT DISTINCT ?p ?o where { <#{uri.to_s}> ?p ?o}"
|
140
|
+
SPARQL.execute(qry,repo).map{|sol|
|
141
|
+
if sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"
|
142
|
+
sol[:o]
|
143
|
+
elsif sol[:p].to_s == "http://semanticscience.org/resource/SIO_000008"
|
144
|
+
qry = "SELECT DISTINCT ?p ?o where { <#{sol[:o].to_s}> ?p ?o}"
|
145
|
+
SPARQL.execute(qry,repo).select{|sol| sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"}.first[:o]
|
146
|
+
elsif sol[:p].to_s != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
147
|
+
sol[:o]
|
148
|
+
end
|
149
|
+
}.reject{|sol| sol == nil}
|
150
|
+
end
|
151
|
+
|
152
|
+
def official_symbol(hugo_symbol)
|
153
|
+
qry = <<-EOF
|
154
|
+
|
155
|
+
SELECT distinct ?official where {
|
156
|
+
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
|
157
|
+
UNION
|
158
|
+
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
|
159
|
+
|
160
|
+
?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
|
161
|
+
}
|
162
|
+
|
163
|
+
EOF
|
164
|
+
|
165
|
+
sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
|
166
|
+
sparql.query(qry).map(&:official).first.to_s
|
167
|
+
end
|
168
|
+
|
169
|
+
def gene_length(hugo_symbol)
|
170
|
+
hugo_symbol = official_symbol(hugo_symbol.split('/').last)
|
171
|
+
qry = IO.read('resources/queries/hugo_to_ensembl.rq').gsub('%{hugo_symbol}',hugo_symbol)
|
172
|
+
sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
|
173
|
+
sol = sparql.query(qry)
|
174
|
+
|
175
|
+
if sol.size == 0
|
176
|
+
raise "No Ensembl entry found for #{hugo_symbol}"
|
177
|
+
else
|
178
|
+
ensemble_id = sol.map(&:ensembl).first.to_s.split(':').last
|
179
|
+
end
|
180
|
+
|
181
|
+
url = URI.parse('http://beta.rest.ensembl.org/')
|
182
|
+
http = Net::HTTP.new(url.host, url.port)
|
183
|
+
request = Net::HTTP::Get.new('/lookup/id/' + ensemble_id + '?format=full', {'Content-Type' => 'application/json'})
|
184
|
+
response = http.request(request)
|
185
|
+
|
186
|
+
if response.code != "200"
|
187
|
+
raise "Invalid response: #{response.code}"
|
188
|
+
else
|
189
|
+
js = JSON.parse(response.body)
|
190
|
+
js['end'] - js['start']
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def derive_gene_lengths
|
195
|
+
|
196
|
+
end
|
197
|
+
|
198
|
+
def patient_info(id,repo)
|
199
|
+
symbols = Array(to_por(select_property(repo,"Hugo_Symbol",patient: id)))
|
200
|
+
# patient_id = select_property(repo,"patient_id",patient: id).to_s
|
201
|
+
patient = {patient_id: id, mutation_count: symbols.size, mutations:[]}
|
202
|
+
|
203
|
+
symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
|
204
|
+
patient
|
205
|
+
end
|
206
|
+
|
207
|
+
def gene_info(hugo_symbol,repo)
|
208
|
+
qry = IO.read('resources/queries/patients_with_mutation.rq').gsub('%{hugo_symbol}',hugo_symbol)
|
209
|
+
sols = SPARQL.execute(qry,repo)
|
210
|
+
patient_count = sols.size
|
211
|
+
{mutations: patient_count, gene_length: gene_length(hugo_symbol), patients: sols.map(&:patient_id).map(&:to_s)}
|
212
|
+
|
213
|
+
# symbols = select_property(repo,"Hugo_Symbol",id).map(&:to_s)
|
214
|
+
# patient_id = select_property(repo,"patient_id",id).first.to_s
|
215
|
+
# patient = {patient_id: patient_id, mutation_count: symbols.size, mutations:[]}
|
216
|
+
|
217
|
+
# symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
|
218
|
+
# patient
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
describe MafQuery do
|
225
|
+
before(:all) do
|
226
|
+
@maf = MafQuery.new
|
227
|
+
@repo = @maf.generate_data
|
228
|
+
end
|
229
|
+
|
230
|
+
describe "query genes" do
|
231
|
+
it { @maf.select_patient_genes(@repo,"BH-A0HP").size.should > 0 }
|
232
|
+
end
|
233
|
+
|
234
|
+
describe "query number of entries" do
|
235
|
+
it { @maf.select_patient_count(@repo,"BH-A0HP").should > 0 }
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
describe ".patients" do
|
240
|
+
it "retrieves a list of patients" do
|
241
|
+
@maf.to_por(@maf.patients(@repo)).first.should == "E9-A22B"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
describe ".select_property" do
|
246
|
+
it { @maf.to_por(@maf.select_property(@repo,"Hugo_Symbol", patient: "BH-A0HP")).should == "http://identifiers.org/hgnc.symbol/A1CF" }
|
247
|
+
it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
|
248
|
+
it { @maf.select_property(@repo,"Center",patient: "BH-A0HP")[:Center].to_s.should == "genome.wustl.edu" }
|
249
|
+
it { @maf.select_property(@repo,"NCBI_Build",patient: "BH-A0HP")[:NCBI_Build].to_i.should == 37 }
|
250
|
+
|
251
|
+
context "extra parsed properties" do
|
252
|
+
it { @maf.select_property(@repo,"sample_id",patient: "BH-A0HP")[:sample_id].should == "01A-12D-A099-09" }
|
253
|
+
it { @maf.select_property(@repo,"patient_id",patient: "BH-A0HP")[:patient_id].should == "BH-A0HP" }
|
254
|
+
end
|
255
|
+
|
256
|
+
context "multiple restrictions" do
|
257
|
+
it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 10)[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
|
258
|
+
it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 2).should == [] }
|
259
|
+
end
|
260
|
+
|
261
|
+
context "multiple selections" do
|
262
|
+
it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
|
263
|
+
it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Hugo_Symbol].to_s.should == 'http://identifiers.org/hgnc.symbol/A1CF' }
|
264
|
+
|
265
|
+
end
|
266
|
+
|
267
|
+
context "non-existant properties" do
|
268
|
+
it { @maf.select_property(@repo,"Chunkiness",patient: "BH-A0HP").should == [] }
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
context "remote service calls", no_travis: true do
|
273
|
+
describe ".gene_length" do
|
274
|
+
it { @maf.gene_length('A2BP1').should == 1694245 }
|
275
|
+
end
|
276
|
+
|
277
|
+
# describe ".official_symbol" do
|
278
|
+
# it { @maf.official_symbol('A2BP1').should == 'RBFOX1' }
|
279
|
+
# end
|
280
|
+
|
281
|
+
describe ".gene_info" do
|
282
|
+
it 'collects the number of mutations and gene lengths for each mutation' do
|
283
|
+
gene = @maf.gene_info('A1BG',@repo)
|
284
|
+
gene[:mutations].should == 2
|
285
|
+
gene[:gene_length].should == 8321
|
286
|
+
gene[:patients].first.should == "E9-A22B"
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
describe ".patient_info" do
|
291
|
+
it 'collects the number of patients with a mutation in a gene and its length' do
|
292
|
+
patient = @maf.patient_info('BH-A0HP',@repo)
|
293
|
+
patient[:mutation_count].should == 1
|
294
|
+
patient[:mutations].first[:length].should == 79113
|
295
|
+
patient[:mutations].first[:symbol].should == 'http://identifiers.org/hgnc.symbol/A1CF'
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class QueryScript
|
302
|
+
def initialize(repo=nil)
|
303
|
+
@__maf = MafQuery.new
|
304
|
+
unless repo
|
305
|
+
@__repo = @__maf.generate_data
|
306
|
+
else
|
307
|
+
@__repo = repo
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def select(operation,*args)
|
312
|
+
if @__maf.methods.include?(:"select_#{operation}")
|
313
|
+
@__maf.to_por(@__maf.send(:"select_#{operation}",@__repo,*args))
|
314
|
+
else
|
315
|
+
@__maf.to_por(@__maf.select_property(@__repo,operation,*args))
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def gene_length(gene)
|
320
|
+
@__maf.to_por(@__maf.gene_length(gene))
|
321
|
+
end
|
322
|
+
|
323
|
+
def report_for(type, id)
|
324
|
+
@__maf.send(:"#{type}_info",id, @__repo)
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
describe QueryScript do
|
329
|
+
describe ".select" do
|
330
|
+
before(:all){
|
331
|
+
@ev = QueryScript.new
|
332
|
+
}
|
333
|
+
|
334
|
+
it { @ev.select('patient_count', "BH-A0HP").should > 0 }
|
335
|
+
|
336
|
+
context "with instance_eval" do
|
337
|
+
it { @ev.instance_eval("select 'patient_count', patient: 'BH-A0HP'").should > 0 }
|
338
|
+
it { @ev.instance_eval("select 'Hugo_Symbol', patient: 'BH-A0HP'").should == 'http://identifiers.org/hgnc.symbol/A1CF' }
|
339
|
+
it { @ev.instance_eval("select 'Chromosome', patient: 'BH-A0HP'").is_a?(Fixnum).should be true }
|
340
|
+
it { @ev.instance_eval("report_for 'patient', 'BH-A0HP'").is_a?(Hash).should be true }
|
341
|
+
end
|
342
|
+
end
|
343
|
+
end
|