publisci 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
data/spec/dsl_spec.rb ADDED
@@ -0,0 +1,134 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::DSL do
4
+ include PubliSci::DSL
5
+
6
+ before(:each) do
7
+ PubliSci::Prov.registry.clear
8
+ PubliSci::Metadata.registry.clear
9
+ PubliSci::Dataset.registry.clear
10
+ end
11
+
12
+ context "maf files" do
13
+ describe "set options" do
14
+ before { PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF) }
15
+ it "can change output type" do
16
+
17
+ dat = data do
18
+ object 'resources/maf_example.maf'
19
+ option :output, :print
20
+ end
21
+
22
+ str = generate_n3
23
+ str[/a qb:Observation/].should_not == nil
24
+ end
25
+
26
+ it "can output to repository" do
27
+ dat = data do
28
+ object 'resources/maf_example.maf'
29
+ option :output, :print
30
+ end
31
+
32
+ repo = to_repository
33
+ repo.is_a?(RDF::Repository).should be true
34
+ repo.size.should > 0
35
+
36
+ qry = <<-EOF
37
+ SELECT ?observation where {
38
+ ?observation a <http://purl.org/linked-data/cube#Observation>;
39
+ <http://onto.strinz.me/properties/Hugo_Symbol> ?node.
40
+
41
+ }
42
+
43
+ EOF
44
+
45
+ sparql = SPARQL::Client.new(repo)
46
+ sparql.query(qry).size.should > 0
47
+ end
48
+ end
49
+ end
50
+
51
+ it "can generate dataset, metadata, and provenance when given a script" do
52
+
53
+ dat = data do
54
+ object 'spec/csv/bacon.csv'
55
+ end
56
+ met = metadata do
57
+ name "Will"
58
+ end
59
+ prv = provenance do
60
+ entity :a_thing
61
+ end
62
+
63
+ met.should_not be nil
64
+ prv.should_not be nil
65
+ dat.should_not be nil
66
+
67
+ generate_n3.size.should > 0
68
+ end
69
+
70
+ it "can generate dataset, metadata, and provenance when given a script" do
71
+ ev = PubliSci::DSL::Instance.new
72
+ dat = ev.instance_eval <<-EOF
73
+ data do
74
+ object 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
75
+ end
76
+ EOF
77
+ dat.should_not be nil
78
+ ev.generate_n3.size.should > 0
79
+ end
80
+
81
+ it "can set generator options" do
82
+ dat = data do
83
+ object 'spec/csv/bacon.csv'
84
+ option :no_labels, true
85
+ end
86
+
87
+ str = generate_n3
88
+ str[/rdfs:label "\d"/].should == nil
89
+ end
90
+
91
+
92
+
93
+ it "can output to in-memory repository" do
94
+ dat = data do
95
+ object 'spec/csv/bacon.csv'
96
+ end
97
+
98
+ repo = to_repository
99
+ repo.is_a?(RDF::Repository).should be true
100
+ repo.size.should > 0
101
+ end
102
+
103
+ it "can output to 4store repository", no_travis: true do
104
+ configure do |cfg|
105
+ cfg.repository = :fourstore
106
+ end
107
+
108
+ dat = data do
109
+ object 'spec/csv/bacon.csv'
110
+ end
111
+
112
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
113
+ old_size = repo.size
114
+ repo = to_repository
115
+ repo.is_a?(RDF::FourStore::Repository).should be true
116
+ repo.size.should > old_size
117
+ end
118
+
119
+ it "can output provenance to 4store", no_travis: true do
120
+ ev = PubliSci::Prov::DSL::Instance.new
121
+ str = IO.read('examples/primer-full.prov')
122
+ ev.instance_eval(str,'examples/primer-full.prov')
123
+ ev.instance_eval <<-EOF
124
+ configure do |cfg|
125
+ cfg.repository = :fourstore
126
+ end
127
+ EOF
128
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
129
+ old_size = repo.size
130
+ repo = ev.to_repository
131
+ repo.is_a?(RDF::FourStore::Repository).should be true
132
+ repo.size.should > old_size
133
+ end
134
+ end
@@ -0,0 +1,44 @@
1
+ # require_relative '../../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../../lib/r2rdf/generators/csv.rb'
3
+ require_relative '../../lib/bio-publisci.rb'
4
+
5
+ # require 'rdf/turtle'
6
+ require 'tempfile'
7
+
8
+ describe PubliSci::Readers::CSV do
9
+
10
+ def create_graph(turtle_string)
11
+ f = Tempfile.new('graph')
12
+ f.write(turtle_string)
13
+ f.close
14
+ graph = RDF::Graph.load(f.path, :format => :ttl)
15
+ f.unlink
16
+ graph
17
+ end
18
+
19
+ before(:each) do
20
+ @generator = PubliSci::Readers::CSV.new
21
+ end
22
+
23
+ context 'with reference CSV' do
24
+ it "should generate correct output for reference file" do
25
+ turtle_string = @generator.generate_n3(File.dirname(__FILE__) + '/../csv/bacon.csv','bacon',{dimensions:["producer","pricerange"], label_column:0})
26
+ ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
27
+ turtle_string.should == ref
28
+ end
29
+ end
30
+
31
+ it "selects first column as a coded dimension and creates measures from the rest by default" do
32
+ turtle_string = @generator.generate_n3(File.dirname(__FILE__) + '/../csv/bacon.csv','bacon')
33
+ graph = create_graph(turtle_string)
34
+ qb = RDF::Vocabulary.new("http://purl.org/linked-data/cube#")
35
+
36
+ dims = RDF::Query.execute(graph){ pattern [:dataset, qb.dimension, :dimension] }
37
+ dims.size.should == 1
38
+ dims.first[:dimension].to_s.should == "http://onto.strinz.me/properties/producer"
39
+
40
+ measures = RDF::Query.execute(graph){ pattern [:dataset, qb.measure, :measure] }
41
+ measures.map{|s| s[:measure].to_s.split('/').last}.should == ["pricerange", "chunkiness", "deliciousness"]
42
+ end
43
+
44
+ end
@@ -0,0 +1,44 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::Readers::Dataframe do
4
+
5
+ def create_graph(turtle_string)
6
+ f = Tempfile.new('graph')
7
+ f.write(turtle_string)
8
+ f.close
9
+ graph = RDF::Graph.load(f.path, :format => :ttl)
10
+ f.unlink
11
+ graph
12
+ end
13
+
14
+ context "with r/qtl dataframe", no_travis: true do
15
+ before(:all) do
16
+ @r = Rserve::Connection.new
17
+ @generator = PubliSci::Readers::Dataframe.new
18
+ @r.eval <<-EOF
19
+ library(qtl)
20
+ data(listeria)
21
+ mr = scanone(listeria,method="mr")
22
+ EOF
23
+ @rexp = @r.eval 'mr'
24
+ @turtle = @generator.generate_n3(@rexp,'mr')
25
+ end
26
+
27
+ it "generates rdf from R dataframe" do
28
+ turtle = @generator.generate_n3(@rexp,'mr')
29
+ turtle.is_a?(String).should be true
30
+ end
31
+
32
+ it "creates correct graph according to refrence file" do
33
+ reference = IO.read(File.dirname(__FILE__) + '/../turtle/reference')
34
+ @turtle.should eq reference
35
+ end
36
+
37
+ it "can optionally specify a row label" do
38
+ @turtle = @generator.generate_n3(@rexp,'mr',{row_label:"markers"})
39
+ end
40
+ end
41
+
42
+
43
+
44
+ end
@@ -0,0 +1,40 @@
1
+ # require_relative '../../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../../lib/r2rdf/generators/csv.rb'
3
+ require_relative '../../lib/bio-publisci.rb'
4
+
5
+ # require 'rdf/turtle'
6
+ require 'tempfile'
7
+
8
+ describe PubliSci::Readers::MAF do
9
+ before(:each) do
10
+ @generator = PubliSci::Readers::MAF.new
11
+ @in_file = 'resources/maf_example.maf'
12
+ end
13
+
14
+ describe ".generate_n3" do
15
+ def is_cube(str)
16
+ str[/a qb:Observation/].should_not be nil
17
+ str[/a rdf:Property, qb:DimensionProperty/].should_not be nil
18
+ str[/a rdf:Property, qb:MeasureProperty/].should_not be nil
19
+ str[/a qb:ComponentSpecification/].should_not be nil
20
+ end
21
+
22
+ context "print output" do
23
+ before { @str = @generator.generate_n3(@in_file, {output: :print})}
24
+ it { is_cube(@str) }
25
+ end
26
+
27
+ context "file output" do
28
+ before {
29
+ f = Tempfile.new('graph')
30
+ f.close
31
+ @generator.generate_n3(@in_file,{output: :file, output_base: f.path})
32
+ @str = IO.read(f.path+'.ttl')
33
+ open('resources/maf_rdf.ttl','w'){|f| f.write @str}
34
+ f.unlink
35
+ }
36
+
37
+ it { is_cube(@str) }
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,51 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+
3
+ require 'tempfile'
4
+
5
+ describe PubliSci::Readers::RCross do
6
+
7
+ def create_graph(turtle_string)
8
+ f = Tempfile.new('graph')
9
+ f.write(turtle_string)
10
+ f.close
11
+ graph = RDF::Graph.load(f.path, :format => :ttl)
12
+ f.unlink
13
+ graph
14
+ end
15
+
16
+ context "with reduced listeria cross", no_travis: true do
17
+ before(:all) do
18
+ @r = Rserve::Connection.new
19
+ @generator = PubliSci::Readers::RCross.new
20
+ @r.eval <<-EOF
21
+ library(qtl)
22
+ data(listeria)
23
+
24
+ liscopy = listeria
25
+
26
+ for(i in 1:20)
27
+ liscopy$geno[[i]]$data <- as.matrix(liscopy$geno[[i]]$data[1:2,])
28
+
29
+ liscopy$pheno <- liscopy$phen[1:2,]
30
+ EOF
31
+ end
32
+
33
+ it "generators output to file by default", no_travis: true do
34
+ f=Tempfile.new('cross')
35
+ @generator.generate_n3(@r,'liscopy',f.path,{quiet: true})
36
+ turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_1.ttl")
37
+ graph = create_graph(turtle_string)
38
+ graph.size.should > 0
39
+ end
40
+
41
+ it "can generate string output", no_travis: true #do
42
+ # pending
43
+ # f=Tempfile.new('cross')
44
+ # turtle_string = @generator.generate_n3(@connection,'liscopy',f.path,{quiet: false, output: :string})
45
+
46
+ # graph = create_graph(turtle_string)
47
+ # graph.size.should > 0
48
+ # end
49
+ end
50
+
51
+ end
@@ -0,0 +1,44 @@
1
+ # require_relative '../../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../../lib/r2rdf/generators/r_matrix.rb'
3
+ # require 'rdf/turtle'
4
+ # require 'rserve'
5
+ require_relative '../../lib/bio-publisci.rb'
6
+
7
+ require 'tempfile'
8
+
9
+ describe PubliSci::Readers::RMatrix do
10
+
11
+ def create_graph(turtle_string)
12
+ f = Tempfile.new('graph')
13
+ f.write(turtle_string)
14
+ f.close
15
+ graph = RDF::Graph.load(f.path, :format => :ttl)
16
+ f.unlink
17
+ graph
18
+ end
19
+
20
+ before(:each) do
21
+ @generator = PubliSci::Readers::RMatrix.new
22
+ @connection = Rserve::Connection.new
23
+ end
24
+
25
+ it "generators a simple output automatically", no_travis: true do
26
+ f=Tempfile.new('matrix')
27
+ @connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
28
+ @generator.generate_n3(@connection,'mat',f.path,{quiet: true})
29
+
30
+ turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_0.ttl")
31
+ graph = create_graph(turtle_string)
32
+ graph.size.should > 0
33
+ end
34
+
35
+ it "can generate string output", no_travis: true do
36
+ f=Tempfile.new('matrix')
37
+ @connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
38
+ turtle_string = @generator.generate_n3(@connection,'mat',f.path,{quiet: true, output: :string})
39
+
40
+ graph = create_graph(turtle_string)
41
+ graph.size.should > 0
42
+ end
43
+
44
+ end
File without changes
@@ -0,0 +1,343 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ #sparql = SPARQL::Client.new("#{repo.uri}/sparql/").query(qry)
4
+
5
+ class MafQuery
6
+ RESTRICTIONS = {
7
+ patient: '<http://onto.strinz.me/properties/patient_id>',
8
+ sample: '<http://onto.strinz.me/properties/sample_id>',
9
+ gene: '<http://onto.strinz.me/properties/Hugo_Symbol>',
10
+ }
11
+
12
+ def to_por(solution)
13
+ if solution.is_a?(Fixnum) or solution.is_a?(String) or solution.is_a?(Symbol)
14
+ solution
15
+ elsif solution.is_a? RDF::Query::Solutions
16
+ solution.map{|sol|
17
+ if sol.bindings.size == 1
18
+ to_por(sol.bindings.first.last)
19
+ else
20
+ Hash(solution.bindings.map{|bind,result| [bind,to_por(result)]})
21
+ end
22
+ }
23
+ elsif solution.is_a? RDF::Query::Solution
24
+ if solution.bindings.size == 1
25
+ to_por(solution.bindings.first.last)
26
+ else
27
+ solution.bindings.map{|bind,result| [bind,to_por(result)] }
28
+ end
29
+ elsif solution.is_a? Array
30
+ if solution.size == 1
31
+ to_por(solution.first)
32
+ else
33
+ solution.map{|sol| to_por(sol)}
34
+ end
35
+ else
36
+ if solution.is_a? RDF::Literal
37
+ solution.object
38
+ elsif solution.is_a? RDF::URI
39
+ solution.to_s
40
+ else
41
+ puts "don't recognzize #{solution.class}"
42
+ solution.to_s
43
+ end
44
+ end
45
+ end
46
+
47
+ def generate_data
48
+ generator = PubliSci::Readers::MAF.new
49
+ in_file = 'resources/maf_example.maf'
50
+ f = Tempfile.new('graph')
51
+ f.close
52
+ generator.generate_n3(in_file, {output: :file, output_base: f.path})
53
+ repo = RDF::Repository.load(f.path+'.ttl')
54
+ File.delete(f.path+'.ttl')
55
+ f.unlink
56
+ repo
57
+ end
58
+
59
+ def select_patient_count(repo,patient_id="A8-A08G")
60
+ qry = IO.read('resources/queries/patient.rq')
61
+ qry = qry.gsub('%{patient}',patient_id)
62
+ SPARQL.execute(qry,repo).first[:barcodes]
63
+ end
64
+
65
+ def patients(repo)
66
+ qry = IO.read('resources/queries/patient_list.rq')
67
+ SPARQL.execute(qry,repo) #.map(&:id).map(&:to_s)
68
+ end
69
+
70
+ def select_patient_genes(repo,patient_id="A8-A08G")
71
+ qry = IO.read('resources/queries/gene.rq')
72
+ qry = qry.gsub('%{patient}',patient_id)
73
+ SPARQL.execute(qry,repo)
74
+ end
75
+
76
+ def select_property(repo,property=["Hugo_Symbol"],restrictions={})
77
+ # qry = IO.read('resources/queries/maf_column.rq').gsub('%{patient}',patient_id).gsub('%{column}',property)
78
+ property = Array(property)
79
+ selects = property
80
+ property = property.map{|prop|
81
+ RESTRICTIONS[prop.to_sym] || "<http://onto.strinz.me/properties/#{prop}>"
82
+ }
83
+
84
+ targets = ""
85
+ property.each_with_index{|p,i|
86
+ targets << "\n #{p} ?#{selects[i]} ;"
87
+ }
88
+
89
+ str = ""
90
+ restrictions.each{|restrict,value|
91
+ prop = RESTRICTIONS[restrict.to_sym] || "<http://onto.strinz.me/properties/#{restrict}>"
92
+ if value.is_a? String
93
+ if RDF::Resource(value).valid?
94
+ if(value[/http:\/\//])
95
+ value = RDF::Resource(value).to_base
96
+ end
97
+ else
98
+ value = '"' + value + '"'
99
+ end
100
+ end
101
+ str << "\n #{prop} #{value} ;"
102
+ }
103
+
104
+
105
+ qry = <<-EOF
106
+ PREFIX qb: <http://purl.org/linked-data/cube#>
107
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
108
+ PREFIX sio: <http://semanticscience.org/resource/>
109
+
110
+ SELECT DISTINCT ?#{selects.join(" ?")} WHERE {
111
+ ?obs a qb:Observation;
112
+ #{str}
113
+ #{targets}
114
+ .
115
+ }
116
+ EOF
117
+
118
+ results = SPARQL.execute(qry,repo)
119
+ # results = results.map{ |solution|
120
+ # solution.bindings.map{ |bind,result| [bind, result]}
121
+
122
+ # # .map(&:column).map{|val|
123
+ # # if val.is_a?(RDF::URI) and val.to_s["node"]
124
+ # # node_value(repo,val)
125
+ # # else
126
+ # # val
127
+ # # end
128
+
129
+ # }.flatten
130
+
131
+ if results.size == 1
132
+ results.first
133
+ else
134
+ results
135
+ end
136
+ end
137
+
138
+ def node_value(repo,uri)
139
+ qry = "SELECT DISTINCT ?p ?o where { <#{uri.to_s}> ?p ?o}"
140
+ SPARQL.execute(qry,repo).map{|sol|
141
+ if sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"
142
+ sol[:o]
143
+ elsif sol[:p].to_s == "http://semanticscience.org/resource/SIO_000008"
144
+ qry = "SELECT DISTINCT ?p ?o where { <#{sol[:o].to_s}> ?p ?o}"
145
+ SPARQL.execute(qry,repo).select{|sol| sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"}.first[:o]
146
+ elsif sol[:p].to_s != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
147
+ sol[:o]
148
+ end
149
+ }.reject{|sol| sol == nil}
150
+ end
151
+
152
+ def official_symbol(hugo_symbol)
153
+ qry = <<-EOF
154
+
155
+ SELECT distinct ?official where {
156
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
157
+ UNION
158
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
159
+
160
+ ?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
161
+ }
162
+
163
+ EOF
164
+
165
+ sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
166
+ sparql.query(qry).map(&:official).first.to_s
167
+ end
168
+
169
+ def gene_length(hugo_symbol)
170
+ hugo_symbol = official_symbol(hugo_symbol.split('/').last)
171
+ qry = IO.read('resources/queries/hugo_to_ensembl.rq').gsub('%{hugo_symbol}',hugo_symbol)
172
+ sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
173
+ sol = sparql.query(qry)
174
+
175
+ if sol.size == 0
176
+ raise "No Ensembl entry found for #{hugo_symbol}"
177
+ else
178
+ ensemble_id = sol.map(&:ensembl).first.to_s.split(':').last
179
+ end
180
+
181
+ url = URI.parse('http://beta.rest.ensembl.org/')
182
+ http = Net::HTTP.new(url.host, url.port)
183
+ request = Net::HTTP::Get.new('/lookup/id/' + ensemble_id + '?format=full', {'Content-Type' => 'application/json'})
184
+ response = http.request(request)
185
+
186
+ if response.code != "200"
187
+ raise "Invalid response: #{response.code}"
188
+ else
189
+ js = JSON.parse(response.body)
190
+ js['end'] - js['start']
191
+ end
192
+ end
193
+
194
+ def derive_gene_lengths
195
+
196
+ end
197
+
198
+ def patient_info(id,repo)
199
+ symbols = Array(to_por(select_property(repo,"Hugo_Symbol",patient: id)))
200
+ # patient_id = select_property(repo,"patient_id",patient: id).to_s
201
+ patient = {patient_id: id, mutation_count: symbols.size, mutations:[]}
202
+
203
+ symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
204
+ patient
205
+ end
206
+
207
+ def gene_info(hugo_symbol,repo)
208
+ qry = IO.read('resources/queries/patients_with_mutation.rq').gsub('%{hugo_symbol}',hugo_symbol)
209
+ sols = SPARQL.execute(qry,repo)
210
+ patient_count = sols.size
211
+ {mutations: patient_count, gene_length: gene_length(hugo_symbol), patients: sols.map(&:patient_id).map(&:to_s)}
212
+
213
+ # symbols = select_property(repo,"Hugo_Symbol",id).map(&:to_s)
214
+ # patient_id = select_property(repo,"patient_id",id).first.to_s
215
+ # patient = {patient_id: patient_id, mutation_count: symbols.size, mutations:[]}
216
+
217
+ # symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
218
+ # patient
219
+ end
220
+ end
221
+
222
+
223
+
224
+ describe MafQuery do
225
+ before(:all) do
226
+ @maf = MafQuery.new
227
+ @repo = @maf.generate_data
228
+ end
229
+
230
+ describe "query genes" do
231
+ it { @maf.select_patient_genes(@repo,"BH-A0HP").size.should > 0 }
232
+ end
233
+
234
+ describe "query number of entries" do
235
+ it { @maf.select_patient_count(@repo,"BH-A0HP").should > 0 }
236
+ end
237
+
238
+
239
+ describe ".patients" do
240
+ it "retrieves a list of patients" do
241
+ @maf.to_por(@maf.patients(@repo)).first.should == "E9-A22B"
242
+ end
243
+ end
244
+
245
+ describe ".select_property" do
246
+ it { @maf.to_por(@maf.select_property(@repo,"Hugo_Symbol", patient: "BH-A0HP")).should == "http://identifiers.org/hgnc.symbol/A1CF" }
247
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
248
+ it { @maf.select_property(@repo,"Center",patient: "BH-A0HP")[:Center].to_s.should == "genome.wustl.edu" }
249
+ it { @maf.select_property(@repo,"NCBI_Build",patient: "BH-A0HP")[:NCBI_Build].to_i.should == 37 }
250
+
251
+ context "extra parsed properties" do
252
+ it { @maf.select_property(@repo,"sample_id",patient: "BH-A0HP")[:sample_id].should == "01A-12D-A099-09" }
253
+ it { @maf.select_property(@repo,"patient_id",patient: "BH-A0HP")[:patient_id].should == "BH-A0HP" }
254
+ end
255
+
256
+ context "multiple restrictions" do
257
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 10)[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
258
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 2).should == [] }
259
+ end
260
+
261
+ context "multiple selections" do
262
+ it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
263
+ it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Hugo_Symbol].to_s.should == 'http://identifiers.org/hgnc.symbol/A1CF' }
264
+
265
+ end
266
+
267
+ context "non-existant properties" do
268
+ it { @maf.select_property(@repo,"Chunkiness",patient: "BH-A0HP").should == [] }
269
+ end
270
+ end
271
+
272
+ context "remote service calls", no_travis: true do
273
+ describe ".gene_length" do
274
+ it { @maf.gene_length('A2BP1').should == 1694245 }
275
+ end
276
+
277
+ # describe ".official_symbol" do
278
+ # it { @maf.official_symbol('A2BP1').should == 'RBFOX1' }
279
+ # end
280
+
281
+ describe ".gene_info" do
282
+ it 'collects the number of mutations and gene lengths for each mutation' do
283
+ gene = @maf.gene_info('A1BG',@repo)
284
+ gene[:mutations].should == 2
285
+ gene[:gene_length].should == 8321
286
+ gene[:patients].first.should == "E9-A22B"
287
+ end
288
+ end
289
+
290
+ describe ".patient_info" do
291
+ it 'collects the number of patients with a mutation in a gene and its length' do
292
+ patient = @maf.patient_info('BH-A0HP',@repo)
293
+ patient[:mutation_count].should == 1
294
+ patient[:mutations].first[:length].should == 79113
295
+ patient[:mutations].first[:symbol].should == 'http://identifiers.org/hgnc.symbol/A1CF'
296
+ end
297
+ end
298
+ end
299
+ end
300
+
301
+ class QueryScript
302
+ def initialize(repo=nil)
303
+ @__maf = MafQuery.new
304
+ unless repo
305
+ @__repo = @__maf.generate_data
306
+ else
307
+ @__repo = repo
308
+ end
309
+ end
310
+
311
+ def select(operation,*args)
312
+ if @__maf.methods.include?(:"select_#{operation}")
313
+ @__maf.to_por(@__maf.send(:"select_#{operation}",@__repo,*args))
314
+ else
315
+ @__maf.to_por(@__maf.select_property(@__repo,operation,*args))
316
+ end
317
+ end
318
+
319
+ def gene_length(gene)
320
+ @__maf.to_por(@__maf.gene_length(gene))
321
+ end
322
+
323
+ def report_for(type, id)
324
+ @__maf.send(:"#{type}_info",id, @__repo)
325
+ end
326
+ end
327
+
328
+ describe QueryScript do
329
+ describe ".select" do
330
+ before(:all){
331
+ @ev = QueryScript.new
332
+ }
333
+
334
+ it { @ev.select('patient_count', "BH-A0HP").should > 0 }
335
+
336
+ context "with instance_eval" do
337
+ it { @ev.instance_eval("select 'patient_count', patient: 'BH-A0HP'").should > 0 }
338
+ it { @ev.instance_eval("select 'Hugo_Symbol', patient: 'BH-A0HP'").should == 'http://identifiers.org/hgnc.symbol/A1CF' }
339
+ it { @ev.instance_eval("select 'Chromosome', patient: 'BH-A0HP'").is_a?(Fixnum).should be true }
340
+ it { @ev.instance_eval("report_for 'patient', 'BH-A0HP'").is_a?(Hash).should be true }
341
+ end
342
+ end
343
+ end