bio-publisci 0.0.8 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +10 -0
  3. data/Rakefile +1 -1
  4. data/bin/bio-publisci-server +50 -0
  5. data/features/reader_steps.rb +1 -1
  6. data/lib/bio-publisci.rb +11 -2
  7. data/lib/bio-publisci/datacube_model.rb +92 -88
  8. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +0 -1
  9. data/lib/bio-publisci/dataset/data_cube.rb +78 -44
  10. data/lib/bio-publisci/dataset/dataset_for.rb +26 -27
  11. data/lib/bio-publisci/metadata/metadata_model.rb +21 -23
  12. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +5 -5
  13. data/lib/bio-publisci/output.rb +1 -1
  14. data/lib/bio-publisci/parser.rb +130 -12
  15. data/lib/bio-publisci/post_processor.rb +95 -0
  16. data/lib/bio-publisci/query/query_helper.rb +13 -8
  17. data/lib/bio-publisci/readers/arff.rb +1 -1
  18. data/lib/bio-publisci/readers/base.rb +57 -0
  19. data/lib/bio-publisci/readers/csv.rb +2 -5
  20. data/lib/bio-publisci/readers/dataframe.rb +2 -2
  21. data/lib/bio-publisci/readers/maf.rb +199 -0
  22. data/lib/bio-publisci/readers/r_cross.rb +6 -10
  23. data/lib/bio-publisci/readers/r_matrix.rb +1 -1
  24. data/lib/bio-publisci/writers/base.rb +16 -0
  25. data/lib/bio-publisci/writers/json.rb +18 -0
  26. data/resources/maf_example.maf +10 -0
  27. data/resources/maf_rdf.ttl +1173 -0
  28. data/resources/primer.ttl +38 -0
  29. data/resources/queries/gene.rq +16 -0
  30. data/resources/queries/hugo_to_ensembl.rq +7 -0
  31. data/resources/queries/maf_column.rq +26 -0
  32. data/resources/queries/patient.rq +11 -0
  33. data/resources/queries/patient_list.rq +11 -0
  34. data/resources/queries/patients_with_mutation.rq +18 -0
  35. data/scripts/get_gene_lengths.rb +50 -0
  36. data/scripts/islet_mlratio.rb +1 -1
  37. data/scripts/scan_islet.rb +1 -1
  38. data/scripts/update_reference.rb +8 -3
  39. data/server/helpers.rb +215 -0
  40. data/server/public/src-min-noconflict/LICENSE +24 -0
  41. data/server/public/src-min-noconflict/ace.js +11 -0
  42. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  43. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  44. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  45. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  46. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  47. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  48. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  49. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  50. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  51. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  52. data/server/public/src-min-noconflict/ext-split.js +1 -0
  53. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  54. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  55. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  56. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  57. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  58. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  59. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  60. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  61. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  62. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  63. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  64. data/server/public/src-min-noconflict/worker-css.js +1 -0
  65. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  66. data/server/public/src-min-noconflict/worker-json.js +1 -0
  67. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  68. data/server/public/src-min-noconflict/worker-php.js +1 -0
  69. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  70. data/server/routes.rb +123 -0
  71. data/server/views/dsl.haml +65 -0
  72. data/server/views/dump.haml +3 -0
  73. data/server/views/import.haml +35 -0
  74. data/server/views/new_repository.haml +25 -0
  75. data/server/views/query.haml +28 -0
  76. data/server/views/repository.haml +25 -0
  77. data/spec/ORM/data_cube_orm_spec.rb +1 -0
  78. data/spec/bnode_spec.rb +66 -0
  79. data/spec/data_cube_spec.rb +66 -63
  80. data/spec/dataset_for_spec.rb +36 -16
  81. data/spec/dsl_spec.rb +41 -0
  82. data/spec/generators/csv_spec.rb +3 -3
  83. data/spec/generators/dataframe_spec.rb +2 -2
  84. data/spec/generators/maf_spec.rb +40 -0
  85. data/spec/generators/r_cross_spec.rb +2 -2
  86. data/spec/generators/r_matrix_spec.rb +2 -2
  87. data/spec/length_lookup_spec.rb +0 -0
  88. data/spec/maf_query_spec.rb +343 -0
  89. data/spec/resource/example.Rhistory +1 -1
  90. data/spec/turtle/bacon +9 -9
  91. data/spec/turtle/reference +43 -43
  92. data/spec/turtle/weather +10 -10
  93. data/spec/writer_spec.rb +16 -2
  94. metadata +212 -61
@@ -52,12 +52,14 @@ describe PubliSci::Dataset::DataCube do
52
52
 
53
53
  it "skips observations with missing values by default" do
54
54
  turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
55
- turtle_string[/.*obsmissingbacon.*\n/].should be nil
56
- end
57
-
58
- it "includes observations with missing values if flag is set" do
59
- turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon',{encode_nulls: true})
60
- turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
55
+ turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
56
+ turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should be nil
57
+ end
58
+
59
+ it "includes observations with missing values if flag is set" do
60
+ turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon',{encode_nulls: true})
61
+ turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
62
+ turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should_not be nil
61
63
  end
62
64
 
63
65
  end
@@ -102,65 +104,66 @@ describe PubliSci::Dataset::DataCube do
102
104
  observations.is_a?(Array).should == true
103
105
  observations.first.is_a?(String).should == true
104
106
  end
105
- end
106
107
 
107
- context "under official integrity constraints" do
108
- before(:all) do
109
- @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
110
- @checks = {}
111
- Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
112
- if file.split('.').last == 'rq'
113
- @checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
114
- end
115
- end
116
- end
117
-
118
- it 'obeys IC-1, has a unique dataset for each observation' do
119
- SPARQL.execute(@checks['1'], @graph).first.should be_nil
120
- end
121
-
122
- it 'obeys IC-2, has a unique data structure definition of each dataset' do
123
- SPARQL.execute(@checks['2'], @graph).first.should be_nil
124
- end
125
-
126
- it 'obeys IC-3, has a measure property specified for each dataset' do
127
- SPARQL.execute(@checks['3'], @graph).first.should be_nil
128
- end
129
-
130
- it 'obeys IC-4, specifies a range for all dimensions' do
131
- SPARQL.execute(@checks['4'], @graph).first.should be_nil
132
- end
133
-
134
- it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
135
- SPARQL.execute(@checks['5'], @graph).first.should be_nil
136
- end
137
-
138
- it 'obeys IC-11, has a value for each dimension in every observation' do
139
- SPARQL.execute(@checks['11'], @graph).first.should be_nil
140
- end
141
-
142
- ## currently locks up. possible bug in SPARQL gem parsing?
143
- ## works fine as a raw query
144
- # it 'obeys IC-12, has do duplicate observations' do
145
- # SPARQL.execute(@checks['12'], @graph).first.should be_nil
146
- # end
147
-
148
- it 'obeys IC-14, has a value for each measure in every observation' do
149
- SPARQL.execute(@checks['14'], @graph).first.should be_nil
150
- end
151
-
152
- it 'obeys IC-19, all codes for each codeList are included' do
153
- SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
154
- ## second query for IC-19 uses property paths that aren't as easy to
155
- ## convert to sparql 1.0, so for now I've left it out
156
- # SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
157
- end
108
+ it "coerces single values into arrays" do
109
+ newdata = Hash[@data.map{|k,v| [k,v.first] }]
110
+ observations = @generator.observations(@measures, @dimensions, @codes, newdata, @labels[0], "bacon")
111
+ observations.is_a?(Array).should == true
112
+ observations.first.is_a?(String).should == true
113
+ end
158
114
  end
159
115
 
160
116
 
161
- it "can set dimensions vs measures via hash" do
162
-
163
- end
164
-
165
-
117
+ context "under official integrity constraints" do
118
+ before(:all) do
119
+ @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
120
+ @checks = {}
121
+ Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
122
+ if file.split('.').last == 'rq'
123
+ @checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
124
+ end
125
+ end
126
+ end
127
+
128
+ it 'obeys IC-1, has a unique dataset for each observation' do
129
+ SPARQL.execute(@checks['1'], @graph).first.should be_nil
130
+ end
131
+
132
+ it 'obeys IC-2, has a unique data structure definition of each dataset' do
133
+ SPARQL.execute(@checks['2'], @graph).first.should be_nil
134
+ end
135
+
136
+ it 'obeys IC-3, has a measure property specified for each dataset' do
137
+ SPARQL.execute(@checks['3'], @graph).first.should be_nil
138
+ end
139
+
140
+ it 'obeys IC-4, specifies a range for all dimensions' do
141
+ SPARQL.execute(@checks['4'], @graph).first.should be_nil
142
+ end
143
+
144
+ it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
145
+ SPARQL.execute(@checks['5'], @graph).first.should be_nil
146
+ end
147
+
148
+ it 'obeys IC-11, has a value for each dimension in every observation' do
149
+ SPARQL.execute(@checks['11'], @graph).first.should be_nil
150
+ end
151
+
152
+ ## currently locks up. possible bug in SPARQL gem parsing?
153
+ ## works fine as a raw query
154
+ # it 'obeys IC-12, has do duplicate observations' do
155
+ # SPARQL.execute(@checks['12'], @graph).first.should be_nil
156
+ # end
157
+
158
+ it 'obeys IC-14, has a value for each measure in every observation' do
159
+ SPARQL.execute(@checks['14'], @graph).first.should be_nil
160
+ end
161
+
162
+ it 'obeys IC-19, all codes for each codeList are included' do
163
+ SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
164
+ ## second query for IC-19 uses property paths that aren't as easy to
165
+ ## convert to sparql 1.0, so for now I've left it out
166
+ # SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
167
+ end
168
+ end
166
169
  end
@@ -1,6 +1,40 @@
1
1
  require_relative '../lib/bio-publisci.rb'
2
2
 
3
3
  describe PubliSci::Dataset do
4
+ it "should use sio:has_value for unknown string types" do
5
+ pending("pending refactor dataset_for to handle raw remote files better")
6
+ turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
7
+ (turtle_string =~ /hasValue/).should_not be nil
8
+ # open('ttl.ttl','w'){|f| f.write turtle_string}
9
+ repo = RDF::Repository.new
10
+
11
+ f = Tempfile.new(['repo','.ttl'])
12
+ f.write(turtle_string)
13
+ f.close
14
+ repo.load(f.path, :format => :ttl)
15
+ f.unlink
16
+
17
+ repo.size.should > 0
18
+ end
19
+
20
+ it "can convert arff files" do
21
+ turtle_string = PubliSci::Dataset.for('resources/weather.numeric.arff',false)
22
+ turtle_string.should == IO.read('spec/turtle/weather')
23
+ end
24
+
25
+ describe ".register_reader" do
26
+ it "can register readers to be used by Dataset.for" do
27
+ PubliSci::Dataset.reader_registry.clear
28
+ expect { PubliSci::Dataset.for('resources/maf_example.maf') }.to raise_error
29
+ PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF)
30
+ file = PubliSci::Dataset.for('resources/maf_example.maf')
31
+ str = IO.read(file)
32
+ File.delete(file.path)
33
+ str.size.should > 0
34
+ (str =~ /qb:Observation/).should_not be nil
35
+ end
36
+ end
37
+
4
38
  context 'with a csv file' do
5
39
  before(:all) do
6
40
  @file = File.dirname(__FILE__) + '/csv/bacon.csv'
@@ -11,21 +45,6 @@ describe PubliSci::Dataset do
11
45
  (turtle_string =~ /qb:Observation/).should_not be nil
12
46
  end
13
47
 
14
- it "should use sio:has_value for unknown string types" do
15
- turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
16
- (turtle_string =~ /hasValue/).should_not be nil
17
- # open('ttl.ttl','w'){|f| f.write turtle_string}
18
- repo = RDF::Repository.new
19
-
20
- f = Tempfile.new(['repo','.ttl'])
21
- f.write(turtle_string)
22
- f.close
23
- repo.load(f.path, :format => :ttl)
24
- f.unlink
25
-
26
- repo.size.should > 0
27
- end
28
-
29
48
  it "will download remote files" do
30
49
  turtle_string = PubliSci::Dataset.for('https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv',false)
31
50
  (turtle_string =~ /prop:pricerange/).should_not be nil
@@ -33,7 +52,7 @@ describe PubliSci::Dataset do
33
52
  end
34
53
 
35
54
  it "will request user input if not provided" do
36
- gen = PubliSci::Reader::CSV.new
55
+ gen = PubliSci::Readers::CSV.new
37
56
  gen.stub(:gets).and_return('pricerange,producer')
38
57
  gen.stub(:puts)
39
58
  turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
@@ -54,4 +73,5 @@ describe PubliSci::Dataset do
54
73
  (turtle_string =~ /prop:producer/).should_not be nil
55
74
  end
56
75
  end
76
+
57
77
  end
@@ -9,6 +9,45 @@ describe PubliSci::DSL do
9
9
  PubliSci::Dataset.registry.clear
10
10
  end
11
11
 
12
+ context "maf files" do
13
+ describe "set options" do
14
+ before { PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF) }
15
+ it "can change output type" do
16
+
17
+ dat = data do
18
+ object 'resources/maf_example.maf'
19
+ option :output, :print
20
+ end
21
+
22
+ str = generate_n3
23
+ str[/a qb:Observation/].should_not == nil
24
+ end
25
+
26
+ it "can output to repository" do
27
+ dat = data do
28
+ object 'resources/maf_example.maf'
29
+ option :output, :print
30
+ end
31
+
32
+ repo = to_repository
33
+ repo.is_a?(RDF::Repository).should be true
34
+ repo.size.should > 0
35
+
36
+ qry = <<-EOF
37
+ SELECT ?observation where {
38
+ ?observation a <http://purl.org/linked-data/cube#Observation>;
39
+ <http://onto.strinz.me/properties/Hugo_Symbol> ?node.
40
+
41
+ }
42
+
43
+ EOF
44
+
45
+ sparql = SPARQL::Client.new(repo)
46
+ sparql.query(qry).size.should > 0
47
+ end
48
+ end
49
+ end
50
+
12
51
  it "can generate dataset, metadata, and provenance when given a script" do
13
52
 
14
53
  dat = data do
@@ -49,6 +88,8 @@ describe PubliSci::DSL do
49
88
  str[/rdfs:label "\d"/].should == nil
50
89
  end
51
90
 
91
+
92
+
52
93
  it "can output to in-memory repository" do
53
94
  dat = data do
54
95
  object 'spec/csv/bacon.csv'
@@ -5,7 +5,7 @@ require_relative '../../lib/bio-publisci.rb'
5
5
  # require 'rdf/turtle'
6
6
  require 'tempfile'
7
7
 
8
- describe PubliSci::Reader::CSV do
8
+ describe PubliSci::Readers::CSV do
9
9
 
10
10
  def create_graph(turtle_string)
11
11
  f = Tempfile.new('graph')
@@ -17,7 +17,7 @@ describe PubliSci::Reader::CSV do
17
17
  end
18
18
 
19
19
  before(:each) do
20
- @generator = PubliSci::Reader::CSV.new
20
+ @generator = PubliSci::Readers::CSV.new
21
21
  end
22
22
 
23
23
  context 'with reference CSV' do
@@ -35,7 +35,7 @@ describe PubliSci::Reader::CSV do
35
35
 
36
36
  dims = RDF::Query.execute(graph){ pattern [:dataset, qb.dimension, :dimension] }
37
37
  dims.size.should == 1
38
- dims.first[:dimension].to_s.should == "http://www.rqtl.org/dc/properties/producer"
38
+ dims.first[:dimension].to_s.should == "http://onto.strinz.me/properties/producer"
39
39
 
40
40
  measures = RDF::Query.execute(graph){ pattern [:dataset, qb.measure, :measure] }
41
41
  measures.map{|s| s[:measure].to_s.split('/').last}.should == ["pricerange", "chunkiness", "deliciousness"]
@@ -1,6 +1,6 @@
1
1
  require_relative '../../lib/bio-publisci.rb'
2
2
 
3
- describe PubliSci::Reader::Dataframe do
3
+ describe PubliSci::Readers::Dataframe do
4
4
 
5
5
  def create_graph(turtle_string)
6
6
  f = Tempfile.new('graph')
@@ -14,7 +14,7 @@ describe PubliSci::Reader::Dataframe do
14
14
  context "with r/qtl dataframe", no_travis: true do
15
15
  before(:all) do
16
16
  @r = Rserve::Connection.new
17
- @generator = PubliSci::Reader::Dataframe.new
17
+ @generator = PubliSci::Readers::Dataframe.new
18
18
  @r.eval <<-EOF
19
19
  library(qtl)
20
20
  data(listeria)
@@ -0,0 +1,40 @@
1
+ # require_relative '../../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../../lib/r2rdf/generators/csv.rb'
3
+ require_relative '../../lib/bio-publisci.rb'
4
+
5
+ # require 'rdf/turtle'
6
+ require 'tempfile'
7
+
8
+ describe PubliSci::Readers::MAF do
9
+ before(:each) do
10
+ @generator = PubliSci::Readers::MAF.new
11
+ @in_file = 'resources/maf_example.maf'
12
+ end
13
+
14
+ describe ".generate_n3" do
15
+ def is_cube(str)
16
+ str[/a qb:Observation/].should_not be nil
17
+ str[/a rdf:Property, qb:DimensionProperty/].should_not be nil
18
+ str[/a rdf:Property, qb:MeasureProperty/].should_not be nil
19
+ str[/a qb:ComponentSpecification/].should_not be nil
20
+ end
21
+
22
+ context "print output" do
23
+ before { @str = @generator.generate_n3(@in_file, {output: :print})}
24
+ it { is_cube(@str) }
25
+ end
26
+
27
+ context "file output" do
28
+ before {
29
+ f = Tempfile.new('graph')
30
+ f.close
31
+ @generator.generate_n3(@in_file,{output: :file, output_base: f.path})
32
+ @str = IO.read(f.path+'.ttl')
33
+ open('resources/maf_rdf.ttl','w'){|f| f.write @str}
34
+ f.unlink
35
+ }
36
+
37
+ it { is_cube(@str) }
38
+ end
39
+ end
40
+ end
@@ -2,7 +2,7 @@ require_relative '../../lib/bio-publisci.rb'
2
2
 
3
3
  require 'tempfile'
4
4
 
5
- describe PubliSci::Reader::RCross do
5
+ describe PubliSci::Readers::RCross do
6
6
 
7
7
  def create_graph(turtle_string)
8
8
  f = Tempfile.new('graph')
@@ -16,7 +16,7 @@ describe PubliSci::Reader::RCross do
16
16
  context "with reduced listeria cross", no_travis: true do
17
17
  before(:all) do
18
18
  @r = Rserve::Connection.new
19
- @generator = PubliSci::Reader::RCross.new
19
+ @generator = PubliSci::Readers::RCross.new
20
20
  @r.eval <<-EOF
21
21
  library(qtl)
22
22
  data(listeria)
@@ -6,7 +6,7 @@ require_relative '../../lib/bio-publisci.rb'
6
6
 
7
7
  require 'tempfile'
8
8
 
9
- describe PubliSci::Reader::RMatrix do
9
+ describe PubliSci::Readers::RMatrix do
10
10
 
11
11
  def create_graph(turtle_string)
12
12
  f = Tempfile.new('graph')
@@ -18,7 +18,7 @@ describe PubliSci::Reader::RMatrix do
18
18
  end
19
19
 
20
20
  before(:each) do
21
- @generator = PubliSci::Reader::RMatrix.new
21
+ @generator = PubliSci::Readers::RMatrix.new
22
22
  @connection = Rserve::Connection.new
23
23
  end
24
24
 
File without changes
@@ -0,0 +1,343 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ #sparql = SPARQL::Client.new("#{repo.uri}/sparql/").query(qry)
4
+
5
+ class MafQuery
6
+ RESTRICTIONS = {
7
+ patient: '<http://onto.strinz.me/properties/patient_id>',
8
+ sample: '<http://onto.strinz.me/properties/sample_id>',
9
+ gene: '<http://onto.strinz.me/properties/Hugo_Symbol>',
10
+ }
11
+
12
+ def to_por(solution)
13
+ if solution.is_a?(Fixnum) or solution.is_a?(String) or solution.is_a?(Symbol)
14
+ solution
15
+ elsif solution.is_a? RDF::Query::Solutions
16
+ solution.map{|sol|
17
+ if sol.bindings.size == 1
18
+ to_por(sol.bindings.first.last)
19
+ else
20
+ Hash(solution.bindings.map{|bind,result| [bind,to_por(result)]})
21
+ end
22
+ }
23
+ elsif solution.is_a? RDF::Query::Solution
24
+ if solution.bindings.size == 1
25
+ to_por(solution.bindings.first.last)
26
+ else
27
+ solution.bindings.map{|bind,result| [bind,to_por(result)] }
28
+ end
29
+ elsif solution.is_a? Array
30
+ if solution.size == 1
31
+ to_por(solution.first)
32
+ else
33
+ solution.map{|sol| to_por(sol)}
34
+ end
35
+ else
36
+ if solution.is_a? RDF::Literal
37
+ solution.object
38
+ elsif solution.is_a? RDF::URI
39
+ solution.to_s
40
+ else
41
+ puts "don't recognzize #{solution.class}"
42
+ solution.to_s
43
+ end
44
+ end
45
+ end
46
+
47
+ def generate_data
48
+ generator = PubliSci::Readers::MAF.new
49
+ in_file = 'resources/maf_example.maf'
50
+ f = Tempfile.new('graph')
51
+ f.close
52
+ generator.generate_n3(in_file, {output: :file, output_base: f.path})
53
+ repo = RDF::Repository.load(f.path+'.ttl')
54
+ File.delete(f.path+'.ttl')
55
+ f.unlink
56
+ repo
57
+ end
58
+
59
+ def select_patient_count(repo,patient_id="A8-A08G")
60
+ qry = IO.read('resources/queries/patient.rq')
61
+ qry = qry.gsub('%{patient}',patient_id)
62
+ SPARQL.execute(qry,repo).first[:barcodes]
63
+ end
64
+
65
+ def patients(repo)
66
+ qry = IO.read('resources/queries/patient_list.rq')
67
+ SPARQL.execute(qry,repo) #.map(&:id).map(&:to_s)
68
+ end
69
+
70
+ def select_patient_genes(repo,patient_id="A8-A08G")
71
+ qry = IO.read('resources/queries/gene.rq')
72
+ qry = qry.gsub('%{patient}',patient_id)
73
+ SPARQL.execute(qry,repo)
74
+ end
75
+
76
+ def select_property(repo,property=["Hugo_Symbol"],restrictions={})
77
+ # qry = IO.read('resources/queries/maf_column.rq').gsub('%{patient}',patient_id).gsub('%{column}',property)
78
+ property = Array(property)
79
+ selects = property
80
+ property = property.map{|prop|
81
+ RESTRICTIONS[prop.to_sym] || "<http://onto.strinz.me/properties/#{prop}>"
82
+ }
83
+
84
+ targets = ""
85
+ property.each_with_index{|p,i|
86
+ targets << "\n #{p} ?#{selects[i]} ;"
87
+ }
88
+
89
+ str = ""
90
+ restrictions.each{|restrict,value|
91
+ prop = RESTRICTIONS[restrict.to_sym] || "<http://onto.strinz.me/properties/#{restrict}>"
92
+ if value.is_a? String
93
+ if RDF::Resource(value).valid?
94
+ if(value[/http:\/\//])
95
+ value = RDF::Resource(value).to_base
96
+ end
97
+ else
98
+ value = '"' + value + '"'
99
+ end
100
+ end
101
+ str << "\n #{prop} #{value} ;"
102
+ }
103
+
104
+
105
+ qry = <<-EOF
106
+ PREFIX qb: <http://purl.org/linked-data/cube#>
107
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
108
+ PREFIX sio: <http://semanticscience.org/resource/>
109
+
110
+ SELECT DISTINCT ?#{selects.join(" ?")} WHERE {
111
+ ?obs a qb:Observation;
112
+ #{str}
113
+ #{targets}
114
+ .
115
+ }
116
+ EOF
117
+
118
+ results = SPARQL.execute(qry,repo)
119
+ # results = results.map{ |solution|
120
+ # solution.bindings.map{ |bind,result| [bind, result]}
121
+
122
+ # # .map(&:column).map{|val|
123
+ # # if val.is_a?(RDF::URI) and val.to_s["node"]
124
+ # # node_value(repo,val)
125
+ # # else
126
+ # # val
127
+ # # end
128
+
129
+ # }.flatten
130
+
131
+ if results.size == 1
132
+ results.first
133
+ else
134
+ results
135
+ end
136
+ end
137
+
138
+ def node_value(repo,uri)
139
+ qry = "SELECT DISTINCT ?p ?o where { <#{uri.to_s}> ?p ?o}"
140
+ SPARQL.execute(qry,repo).map{|sol|
141
+ if sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"
142
+ sol[:o]
143
+ elsif sol[:p].to_s == "http://semanticscience.org/resource/SIO_000008"
144
+ qry = "SELECT DISTINCT ?p ?o where { <#{sol[:o].to_s}> ?p ?o}"
145
+ SPARQL.execute(qry,repo).select{|sol| sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"}.first[:o]
146
+ elsif sol[:p].to_s != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
147
+ sol[:o]
148
+ end
149
+ }.reject{|sol| sol == nil}
150
+ end
151
+
152
+ def official_symbol(hugo_symbol)
153
+ qry = <<-EOF
154
+
155
+ SELECT distinct ?official where {
156
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
157
+ UNION
158
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
159
+
160
+ ?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
161
+ }
162
+
163
+ EOF
164
+
165
+ sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
166
+ sparql.query(qry).map(&:official).first.to_s
167
+ end
168
+
169
+ def gene_length(hugo_symbol)
170
+ hugo_symbol = official_symbol(hugo_symbol.split('/').last)
171
+ qry = IO.read('resources/queries/hugo_to_ensembl.rq').gsub('%{hugo_symbol}',hugo_symbol)
172
+ sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
173
+ sol = sparql.query(qry)
174
+
175
+ if sol.size == 0
176
+ raise "No Ensembl entry found for #{hugo_symbol}"
177
+ else
178
+ ensemble_id = sol.map(&:ensembl).first.to_s.split(':').last
179
+ end
180
+
181
+ url = URI.parse('http://beta.rest.ensembl.org/')
182
+ http = Net::HTTP.new(url.host, url.port)
183
+ request = Net::HTTP::Get.new('/lookup/id/' + ensemble_id + '?format=full', {'Content-Type' => 'application/json'})
184
+ response = http.request(request)
185
+
186
+ if response.code != "200"
187
+ raise "Invalid response: #{response.code}"
188
+ else
189
+ js = JSON.parse(response.body)
190
+ js['end'] - js['start']
191
+ end
192
+ end
193
+
194
+ def derive_gene_lengths
195
+
196
+ end
197
+
198
+ def patient_info(id,repo)
199
+ symbols = Array(to_por(select_property(repo,"Hugo_Symbol",patient: id)))
200
+ # patient_id = select_property(repo,"patient_id",patient: id).to_s
201
+ patient = {patient_id: id, mutation_count: symbols.size, mutations:[]}
202
+
203
+ symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
204
+ patient
205
+ end
206
+
207
+ def gene_info(hugo_symbol,repo)
208
+ qry = IO.read('resources/queries/patients_with_mutation.rq').gsub('%{hugo_symbol}',hugo_symbol)
209
+ sols = SPARQL.execute(qry,repo)
210
+ patient_count = sols.size
211
+ {mutations: patient_count, gene_length: gene_length(hugo_symbol), patients: sols.map(&:patient_id).map(&:to_s)}
212
+
213
+ # symbols = select_property(repo,"Hugo_Symbol",id).map(&:to_s)
214
+ # patient_id = select_property(repo,"patient_id",id).first.to_s
215
+ # patient = {patient_id: patient_id, mutation_count: symbols.size, mutations:[]}
216
+
217
+ # symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
218
+ # patient
219
+ end
220
+ end
221
+
222
+
223
+
224
+ describe MafQuery do
225
+ before(:all) do
226
+ @maf = MafQuery.new
227
+ @repo = @maf.generate_data
228
+ end
229
+
230
+ describe "query genes" do
231
+ it { @maf.select_patient_genes(@repo,"BH-A0HP").size.should > 0 }
232
+ end
233
+
234
+ describe "query number of entries" do
235
+ it { @maf.select_patient_count(@repo,"BH-A0HP").should > 0 }
236
+ end
237
+
238
+
239
+ describe ".patients" do
240
+ it "retrieves a list of patients" do
241
+ @maf.to_por(@maf.patients(@repo)).first.should == "E9-A22B"
242
+ end
243
+ end
244
+
245
+ describe ".select_property" do
246
+ it { @maf.to_por(@maf.select_property(@repo,"Hugo_Symbol", patient: "BH-A0HP")).should == "http://identifiers.org/hgnc.symbol/A1CF" }
247
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
248
+ it { @maf.select_property(@repo,"Center",patient: "BH-A0HP")[:Center].to_s.should == "genome.wustl.edu" }
249
+ it { @maf.select_property(@repo,"NCBI_Build",patient: "BH-A0HP")[:NCBI_Build].to_i.should == 37 }
250
+
251
+ context "extra parsed properties" do
252
+ it { @maf.select_property(@repo,"sample_id",patient: "BH-A0HP")[:sample_id].should == "01A-12D-A099-09" }
253
+ it { @maf.select_property(@repo,"patient_id",patient: "BH-A0HP")[:patient_id].should == "BH-A0HP" }
254
+ end
255
+
256
+ context "multiple restrictions" do
257
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 10)[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
258
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 2).should == [] }
259
+ end
260
+
261
+ context "multiple selections" do
262
+ it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
263
+ it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Hugo_Symbol].to_s.should == 'http://identifiers.org/hgnc.symbol/A1CF' }
264
+
265
+ end
266
+
267
+ context "non-existant properties" do
268
+ it { @maf.select_property(@repo,"Chunkiness",patient: "BH-A0HP").should == [] }
269
+ end
270
+ end
271
+
272
+ context "remote service calls", no_travis: true do
273
+ describe ".gene_length" do
274
+ it { @maf.gene_length('A2BP1').should == 1694245 }
275
+ end
276
+
277
+ # describe ".official_symbol" do
278
+ # it { @maf.official_symbol('A2BP1').should == 'RBFOX1' }
279
+ # end
280
+
281
+ describe ".gene_info" do
282
+ it 'collects the number of mutations and gene lengths for each mutation' do
283
+ gene = @maf.gene_info('A1BG',@repo)
284
+ gene[:mutations].should == 2
285
+ gene[:gene_length].should == 8321
286
+ gene[:patients].first.should == "E9-A22B"
287
+ end
288
+ end
289
+
290
+ describe ".patient_info" do
291
+ it 'collects the number of patients with a mutation in a gene and its length' do
292
+ patient = @maf.patient_info('BH-A0HP',@repo)
293
+ patient[:mutation_count].should == 1
294
+ patient[:mutations].first[:length].should == 79113
295
+ patient[:mutations].first[:symbol].should == 'http://identifiers.org/hgnc.symbol/A1CF'
296
+ end
297
+ end
298
+ end
299
+ end
300
+
301
+ class QueryScript
302
+ def initialize(repo=nil)
303
+ @__maf = MafQuery.new
304
+ unless repo
305
+ @__repo = @__maf.generate_data
306
+ else
307
+ @__repo = repo
308
+ end
309
+ end
310
+
311
+ def select(operation,*args)
312
+ if @__maf.methods.include?(:"select_#{operation}")
313
+ @__maf.to_por(@__maf.send(:"select_#{operation}",@__repo,*args))
314
+ else
315
+ @__maf.to_por(@__maf.select_property(@__repo,operation,*args))
316
+ end
317
+ end
318
+
319
+ def gene_length(gene)
320
+ @__maf.to_por(@__maf.gene_length(gene))
321
+ end
322
+
323
+ def report_for(type, id)
324
+ @__maf.send(:"#{type}_info",id, @__repo)
325
+ end
326
+ end
327
+
328
+ describe QueryScript do
329
+ describe ".select" do
330
+ before(:all){
331
+ @ev = QueryScript.new
332
+ }
333
+
334
+ it { @ev.select('patient_count', "BH-A0HP").should > 0 }
335
+
336
+ context "with instance_eval" do
337
+ it { @ev.instance_eval("select 'patient_count', patient: 'BH-A0HP'").should > 0 }
338
+ it { @ev.instance_eval("select 'Hugo_Symbol', patient: 'BH-A0HP'").should == 'http://identifiers.org/hgnc.symbol/A1CF' }
339
+ it { @ev.instance_eval("select 'Chromosome', patient: 'BH-A0HP'").is_a?(Fixnum).should be true }
340
+ it { @ev.instance_eval("report_for 'patient', 'BH-A0HP'").is_a?(Hash).should be true }
341
+ end
342
+ end
343
+ end