publisci 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
data/spec/dsl_spec.rb ADDED
@@ -0,0 +1,134 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::DSL do
4
+ include PubliSci::DSL
5
+
6
+ before(:each) do
7
+ PubliSci::Prov.registry.clear
8
+ PubliSci::Metadata.registry.clear
9
+ PubliSci::Dataset.registry.clear
10
+ end
11
+
12
+ context "maf files" do
13
+ describe "set options" do
14
+ before { PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF) }
15
+ it "can change output type" do
16
+
17
+ dat = data do
18
+ object 'resources/maf_example.maf'
19
+ option :output, :print
20
+ end
21
+
22
+ str = generate_n3
23
+ str[/a qb:Observation/].should_not == nil
24
+ end
25
+
26
+ it "can output to repository" do
27
+ dat = data do
28
+ object 'resources/maf_example.maf'
29
+ option :output, :print
30
+ end
31
+
32
+ repo = to_repository
33
+ repo.is_a?(RDF::Repository).should be true
34
+ repo.size.should > 0
35
+
36
+ qry = <<-EOF
37
+ SELECT ?observation where {
38
+ ?observation a <http://purl.org/linked-data/cube#Observation>;
39
+ <http://onto.strinz.me/properties/Hugo_Symbol> ?node.
40
+
41
+ }
42
+
43
+ EOF
44
+
45
+ sparql = SPARQL::Client.new(repo)
46
+ sparql.query(qry).size.should > 0
47
+ end
48
+ end
49
+ end
50
+
51
+ it "can generate dataset, metadata, and provenance when given a script" do
52
+
53
+ dat = data do
54
+ object 'spec/csv/bacon.csv'
55
+ end
56
+ met = metadata do
57
+ name "Will"
58
+ end
59
+ prv = provenance do
60
+ entity :a_thing
61
+ end
62
+
63
+ met.should_not be nil
64
+ prv.should_not be nil
65
+ dat.should_not be nil
66
+
67
+ generate_n3.size.should > 0
68
+ end
69
+
70
+ it "can generate dataset, metadata, and provenance when given a script" do
71
+ ev = PubliSci::DSL::Instance.new
72
+ dat = ev.instance_eval <<-EOF
73
+ data do
74
+ object 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
75
+ end
76
+ EOF
77
+ dat.should_not be nil
78
+ ev.generate_n3.size.should > 0
79
+ end
80
+
81
+ it "can set generator options" do
82
+ dat = data do
83
+ object 'spec/csv/bacon.csv'
84
+ option :no_labels, true
85
+ end
86
+
87
+ str = generate_n3
88
+ str[/rdfs:label "\d"/].should == nil
89
+ end
90
+
91
+
92
+
93
+ it "can output to in-memory repository" do
94
+ dat = data do
95
+ object 'spec/csv/bacon.csv'
96
+ end
97
+
98
+ repo = to_repository
99
+ repo.is_a?(RDF::Repository).should be true
100
+ repo.size.should > 0
101
+ end
102
+
103
+ it "can output to 4store repository", no_travis: true do
104
+ configure do |cfg|
105
+ cfg.repository = :fourstore
106
+ end
107
+
108
+ dat = data do
109
+ object 'spec/csv/bacon.csv'
110
+ end
111
+
112
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
113
+ old_size = repo.size
114
+ repo = to_repository
115
+ repo.is_a?(RDF::FourStore::Repository).should be true
116
+ repo.size.should > old_size
117
+ end
118
+
119
+ it "can output provenance to 4store", no_travis: true do
120
+ ev = PubliSci::Prov::DSL::Instance.new
121
+ str = IO.read('examples/primer-full.prov')
122
+ ev.instance_eval(str,'examples/primer-full.prov')
123
+ ev.instance_eval <<-EOF
124
+ configure do |cfg|
125
+ cfg.repository = :fourstore
126
+ end
127
+ EOF
128
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
129
+ old_size = repo.size
130
+ repo = ev.to_repository
131
+ repo.is_a?(RDF::FourStore::Repository).should be true
132
+ repo.size.should > old_size
133
+ end
134
+ end
@@ -0,0 +1,44 @@
1
+ # require_relative '../../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../../lib/r2rdf/generators/csv.rb'
3
+ require_relative '../../lib/bio-publisci.rb'
4
+
5
+ # require 'rdf/turtle'
6
+ require 'tempfile'
7
+
8
+ describe PubliSci::Readers::CSV do
9
+
10
+ def create_graph(turtle_string)
11
+ f = Tempfile.new('graph')
12
+ f.write(turtle_string)
13
+ f.close
14
+ graph = RDF::Graph.load(f.path, :format => :ttl)
15
+ f.unlink
16
+ graph
17
+ end
18
+
19
+ before(:each) do
20
+ @generator = PubliSci::Readers::CSV.new
21
+ end
22
+
23
+ context 'with reference CSV' do
24
+ it "should generate correct output for reference file" do
25
+ turtle_string = @generator.generate_n3(File.dirname(__FILE__) + '/../csv/bacon.csv','bacon',{dimensions:["producer","pricerange"], label_column:0})
26
+ ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
27
+ turtle_string.should == ref
28
+ end
29
+ end
30
+
31
+ it "selects first column as a coded dimension and creates measures from the rest by default" do
32
+ turtle_string = @generator.generate_n3(File.dirname(__FILE__) + '/../csv/bacon.csv','bacon')
33
+ graph = create_graph(turtle_string)
34
+ qb = RDF::Vocabulary.new("http://purl.org/linked-data/cube#")
35
+
36
+ dims = RDF::Query.execute(graph){ pattern [:dataset, qb.dimension, :dimension] }
37
+ dims.size.should == 1
38
+ dims.first[:dimension].to_s.should == "http://onto.strinz.me/properties/producer"
39
+
40
+ measures = RDF::Query.execute(graph){ pattern [:dataset, qb.measure, :measure] }
41
+ measures.map{|s| s[:measure].to_s.split('/').last}.should == ["pricerange", "chunkiness", "deliciousness"]
42
+ end
43
+
44
+ end
@@ -0,0 +1,44 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::Readers::Dataframe do
4
+
5
+ def create_graph(turtle_string)
6
+ f = Tempfile.new('graph')
7
+ f.write(turtle_string)
8
+ f.close
9
+ graph = RDF::Graph.load(f.path, :format => :ttl)
10
+ f.unlink
11
+ graph
12
+ end
13
+
14
+ context "with r/qtl dataframe", no_travis: true do
15
+ before(:all) do
16
+ @r = Rserve::Connection.new
17
+ @generator = PubliSci::Readers::Dataframe.new
18
+ @r.eval <<-EOF
19
+ library(qtl)
20
+ data(listeria)
21
+ mr = scanone(listeria,method="mr")
22
+ EOF
23
+ @rexp = @r.eval 'mr'
24
+ @turtle = @generator.generate_n3(@rexp,'mr')
25
+ end
26
+
27
+ it "generates rdf from R dataframe" do
28
+ turtle = @generator.generate_n3(@rexp,'mr')
29
+ turtle.is_a?(String).should be true
30
+ end
31
+
32
+ it "creates correct graph according to refrence file" do
33
+ reference = IO.read(File.dirname(__FILE__) + '/../turtle/reference')
34
+ @turtle.should eq reference
35
+ end
36
+
37
+ it "can optionally specify a row label" do
38
+ @turtle = @generator.generate_n3(@rexp,'mr',{row_label:"markers"})
39
+ end
40
+ end
41
+
42
+
43
+
44
+ end
@@ -0,0 +1,40 @@
1
+ # require_relative '../../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../../lib/r2rdf/generators/csv.rb'
3
+ require_relative '../../lib/bio-publisci.rb'
4
+
5
+ # require 'rdf/turtle'
6
+ require 'tempfile'
7
+
8
+ describe PubliSci::Readers::MAF do
9
+ before(:each) do
10
+ @generator = PubliSci::Readers::MAF.new
11
+ @in_file = 'resources/maf_example.maf'
12
+ end
13
+
14
+ describe ".generate_n3" do
15
+ def is_cube(str)
16
+ str[/a qb:Observation/].should_not be nil
17
+ str[/a rdf:Property, qb:DimensionProperty/].should_not be nil
18
+ str[/a rdf:Property, qb:MeasureProperty/].should_not be nil
19
+ str[/a qb:ComponentSpecification/].should_not be nil
20
+ end
21
+
22
+ context "print output" do
23
+ before { @str = @generator.generate_n3(@in_file, {output: :print})}
24
+ it { is_cube(@str) }
25
+ end
26
+
27
+ context "file output" do
28
+ before {
29
+ f = Tempfile.new('graph')
30
+ f.close
31
+ @generator.generate_n3(@in_file,{output: :file, output_base: f.path})
32
+ @str = IO.read(f.path+'.ttl')
33
+ open('resources/maf_rdf.ttl','w'){|f| f.write @str}
34
+ f.unlink
35
+ }
36
+
37
+ it { is_cube(@str) }
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,51 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+
3
+ require 'tempfile'
4
+
5
+ describe PubliSci::Readers::RCross do
6
+
7
+ def create_graph(turtle_string)
8
+ f = Tempfile.new('graph')
9
+ f.write(turtle_string)
10
+ f.close
11
+ graph = RDF::Graph.load(f.path, :format => :ttl)
12
+ f.unlink
13
+ graph
14
+ end
15
+
16
+ context "with reduced listeria cross", no_travis: true do
17
+ before(:all) do
18
+ @r = Rserve::Connection.new
19
+ @generator = PubliSci::Readers::RCross.new
20
+ @r.eval <<-EOF
21
+ library(qtl)
22
+ data(listeria)
23
+
24
+ liscopy = listeria
25
+
26
+ for(i in 1:20)
27
+ liscopy$geno[[i]]$data <- as.matrix(liscopy$geno[[i]]$data[1:2,])
28
+
29
+ liscopy$pheno <- liscopy$phen[1:2,]
30
+ EOF
31
+ end
32
+
33
+ it "generators output to file by default", no_travis: true do
34
+ f=Tempfile.new('cross')
35
+ @generator.generate_n3(@r,'liscopy',f.path,{quiet: true})
36
+ turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_1.ttl")
37
+ graph = create_graph(turtle_string)
38
+ graph.size.should > 0
39
+ end
40
+
41
+ it "can generate string output", no_travis: true #do
42
+ # pending
43
+ # f=Tempfile.new('cross')
44
+ # turtle_string = @generator.generate_n3(@connection,'liscopy',f.path,{quiet: false, output: :string})
45
+
46
+ # graph = create_graph(turtle_string)
47
+ # graph.size.should > 0
48
+ # end
49
+ end
50
+
51
+ end
@@ -0,0 +1,44 @@
1
+ # require_relative '../../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../../lib/r2rdf/generators/r_matrix.rb'
3
+ # require 'rdf/turtle'
4
+ # require 'rserve'
5
+ require_relative '../../lib/bio-publisci.rb'
6
+
7
+ require 'tempfile'
8
+
9
+ describe PubliSci::Readers::RMatrix do
10
+
11
+ def create_graph(turtle_string)
12
+ f = Tempfile.new('graph')
13
+ f.write(turtle_string)
14
+ f.close
15
+ graph = RDF::Graph.load(f.path, :format => :ttl)
16
+ f.unlink
17
+ graph
18
+ end
19
+
20
+ before(:each) do
21
+ @generator = PubliSci::Readers::RMatrix.new
22
+ @connection = Rserve::Connection.new
23
+ end
24
+
25
+ it "generators a simple output automatically", no_travis: true do
26
+ f=Tempfile.new('matrix')
27
+ @connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
28
+ @generator.generate_n3(@connection,'mat',f.path,{quiet: true})
29
+
30
+ turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_0.ttl")
31
+ graph = create_graph(turtle_string)
32
+ graph.size.should > 0
33
+ end
34
+
35
+ it "can generate string output", no_travis: true do
36
+ f=Tempfile.new('matrix')
37
+ @connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
38
+ turtle_string = @generator.generate_n3(@connection,'mat',f.path,{quiet: true, output: :string})
39
+
40
+ graph = create_graph(turtle_string)
41
+ graph.size.should > 0
42
+ end
43
+
44
+ end
File without changes
@@ -0,0 +1,343 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ #sparql = SPARQL::Client.new("#{repo.uri}/sparql/").query(qry)
4
+
5
+ class MafQuery
6
+ RESTRICTIONS = {
7
+ patient: '<http://onto.strinz.me/properties/patient_id>',
8
+ sample: '<http://onto.strinz.me/properties/sample_id>',
9
+ gene: '<http://onto.strinz.me/properties/Hugo_Symbol>',
10
+ }
11
+
12
+ def to_por(solution)
13
+ if solution.is_a?(Fixnum) or solution.is_a?(String) or solution.is_a?(Symbol)
14
+ solution
15
+ elsif solution.is_a? RDF::Query::Solutions
16
+ solution.map{|sol|
17
+ if sol.bindings.size == 1
18
+ to_por(sol.bindings.first.last)
19
+ else
20
+ Hash(solution.bindings.map{|bind,result| [bind,to_por(result)]})
21
+ end
22
+ }
23
+ elsif solution.is_a? RDF::Query::Solution
24
+ if solution.bindings.size == 1
25
+ to_por(solution.bindings.first.last)
26
+ else
27
+ solution.bindings.map{|bind,result| [bind,to_por(result)] }
28
+ end
29
+ elsif solution.is_a? Array
30
+ if solution.size == 1
31
+ to_por(solution.first)
32
+ else
33
+ solution.map{|sol| to_por(sol)}
34
+ end
35
+ else
36
+ if solution.is_a? RDF::Literal
37
+ solution.object
38
+ elsif solution.is_a? RDF::URI
39
+ solution.to_s
40
+ else
41
+ puts "don't recognzize #{solution.class}"
42
+ solution.to_s
43
+ end
44
+ end
45
+ end
46
+
47
+ def generate_data
48
+ generator = PubliSci::Readers::MAF.new
49
+ in_file = 'resources/maf_example.maf'
50
+ f = Tempfile.new('graph')
51
+ f.close
52
+ generator.generate_n3(in_file, {output: :file, output_base: f.path})
53
+ repo = RDF::Repository.load(f.path+'.ttl')
54
+ File.delete(f.path+'.ttl')
55
+ f.unlink
56
+ repo
57
+ end
58
+
59
+ def select_patient_count(repo,patient_id="A8-A08G")
60
+ qry = IO.read('resources/queries/patient.rq')
61
+ qry = qry.gsub('%{patient}',patient_id)
62
+ SPARQL.execute(qry,repo).first[:barcodes]
63
+ end
64
+
65
+ def patients(repo)
66
+ qry = IO.read('resources/queries/patient_list.rq')
67
+ SPARQL.execute(qry,repo) #.map(&:id).map(&:to_s)
68
+ end
69
+
70
+ def select_patient_genes(repo,patient_id="A8-A08G")
71
+ qry = IO.read('resources/queries/gene.rq')
72
+ qry = qry.gsub('%{patient}',patient_id)
73
+ SPARQL.execute(qry,repo)
74
+ end
75
+
76
+ def select_property(repo,property=["Hugo_Symbol"],restrictions={})
77
+ # qry = IO.read('resources/queries/maf_column.rq').gsub('%{patient}',patient_id).gsub('%{column}',property)
78
+ property = Array(property)
79
+ selects = property
80
+ property = property.map{|prop|
81
+ RESTRICTIONS[prop.to_sym] || "<http://onto.strinz.me/properties/#{prop}>"
82
+ }
83
+
84
+ targets = ""
85
+ property.each_with_index{|p,i|
86
+ targets << "\n #{p} ?#{selects[i]} ;"
87
+ }
88
+
89
+ str = ""
90
+ restrictions.each{|restrict,value|
91
+ prop = RESTRICTIONS[restrict.to_sym] || "<http://onto.strinz.me/properties/#{restrict}>"
92
+ if value.is_a? String
93
+ if RDF::Resource(value).valid?
94
+ if(value[/http:\/\//])
95
+ value = RDF::Resource(value).to_base
96
+ end
97
+ else
98
+ value = '"' + value + '"'
99
+ end
100
+ end
101
+ str << "\n #{prop} #{value} ;"
102
+ }
103
+
104
+
105
+ qry = <<-EOF
106
+ PREFIX qb: <http://purl.org/linked-data/cube#>
107
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
108
+ PREFIX sio: <http://semanticscience.org/resource/>
109
+
110
+ SELECT DISTINCT ?#{selects.join(" ?")} WHERE {
111
+ ?obs a qb:Observation;
112
+ #{str}
113
+ #{targets}
114
+ .
115
+ }
116
+ EOF
117
+
118
+ results = SPARQL.execute(qry,repo)
119
+ # results = results.map{ |solution|
120
+ # solution.bindings.map{ |bind,result| [bind, result]}
121
+
122
+ # # .map(&:column).map{|val|
123
+ # # if val.is_a?(RDF::URI) and val.to_s["node"]
124
+ # # node_value(repo,val)
125
+ # # else
126
+ # # val
127
+ # # end
128
+
129
+ # }.flatten
130
+
131
+ if results.size == 1
132
+ results.first
133
+ else
134
+ results
135
+ end
136
+ end
137
+
138
+ def node_value(repo,uri)
139
+ qry = "SELECT DISTINCT ?p ?o where { <#{uri.to_s}> ?p ?o}"
140
+ SPARQL.execute(qry,repo).map{|sol|
141
+ if sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"
142
+ sol[:o]
143
+ elsif sol[:p].to_s == "http://semanticscience.org/resource/SIO_000008"
144
+ qry = "SELECT DISTINCT ?p ?o where { <#{sol[:o].to_s}> ?p ?o}"
145
+ SPARQL.execute(qry,repo).select{|sol| sol[:p].to_s == "http://semanticscience.org/resource/SIO_000300"}.first[:o]
146
+ elsif sol[:p].to_s != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
147
+ sol[:o]
148
+ end
149
+ }.reject{|sol| sol == nil}
150
+ end
151
+
152
+ def official_symbol(hugo_symbol)
153
+ qry = <<-EOF
154
+
155
+ SELECT distinct ?official where {
156
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
157
+ UNION
158
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
159
+
160
+ ?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
161
+ }
162
+
163
+ EOF
164
+
165
+ sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
166
+ sparql.query(qry).map(&:official).first.to_s
167
+ end
168
+
169
+ def gene_length(hugo_symbol)
170
+ hugo_symbol = official_symbol(hugo_symbol.split('/').last)
171
+ qry = IO.read('resources/queries/hugo_to_ensembl.rq').gsub('%{hugo_symbol}',hugo_symbol)
172
+ sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
173
+ sol = sparql.query(qry)
174
+
175
+ if sol.size == 0
176
+ raise "No Ensembl entry found for #{hugo_symbol}"
177
+ else
178
+ ensemble_id = sol.map(&:ensembl).first.to_s.split(':').last
179
+ end
180
+
181
+ url = URI.parse('http://beta.rest.ensembl.org/')
182
+ http = Net::HTTP.new(url.host, url.port)
183
+ request = Net::HTTP::Get.new('/lookup/id/' + ensemble_id + '?format=full', {'Content-Type' => 'application/json'})
184
+ response = http.request(request)
185
+
186
+ if response.code != "200"
187
+ raise "Invalid response: #{response.code}"
188
+ else
189
+ js = JSON.parse(response.body)
190
+ js['end'] - js['start']
191
+ end
192
+ end
193
+
194
+ def derive_gene_lengths
195
+
196
+ end
197
+
198
+ def patient_info(id,repo)
199
+ symbols = Array(to_por(select_property(repo,"Hugo_Symbol",patient: id)))
200
+ # patient_id = select_property(repo,"patient_id",patient: id).to_s
201
+ patient = {patient_id: id, mutation_count: symbols.size, mutations:[]}
202
+
203
+ symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
204
+ patient
205
+ end
206
+
207
+ def gene_info(hugo_symbol,repo)
208
+ qry = IO.read('resources/queries/patients_with_mutation.rq').gsub('%{hugo_symbol}',hugo_symbol)
209
+ sols = SPARQL.execute(qry,repo)
210
+ patient_count = sols.size
211
+ {mutations: patient_count, gene_length: gene_length(hugo_symbol), patients: sols.map(&:patient_id).map(&:to_s)}
212
+
213
+ # symbols = select_property(repo,"Hugo_Symbol",id).map(&:to_s)
214
+ # patient_id = select_property(repo,"patient_id",id).first.to_s
215
+ # patient = {patient_id: patient_id, mutation_count: symbols.size, mutations:[]}
216
+
217
+ # symbols.each{|sym| patient[:mutations] << {symbol: sym, length: gene_length(sym)}}
218
+ # patient
219
+ end
220
+ end
221
+
222
+
223
+
224
+ describe MafQuery do
225
+ before(:all) do
226
+ @maf = MafQuery.new
227
+ @repo = @maf.generate_data
228
+ end
229
+
230
+ describe "query genes" do
231
+ it { @maf.select_patient_genes(@repo,"BH-A0HP").size.should > 0 }
232
+ end
233
+
234
+ describe "query number of entries" do
235
+ it { @maf.select_patient_count(@repo,"BH-A0HP").should > 0 }
236
+ end
237
+
238
+
239
+ describe ".patients" do
240
+ it "retrieves a list of patients" do
241
+ @maf.to_por(@maf.patients(@repo)).first.should == "E9-A22B"
242
+ end
243
+ end
244
+
245
+ describe ".select_property" do
246
+ it { @maf.to_por(@maf.select_property(@repo,"Hugo_Symbol", patient: "BH-A0HP")).should == "http://identifiers.org/hgnc.symbol/A1CF" }
247
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
248
+ it { @maf.select_property(@repo,"Center",patient: "BH-A0HP")[:Center].to_s.should == "genome.wustl.edu" }
249
+ it { @maf.select_property(@repo,"NCBI_Build",patient: "BH-A0HP")[:NCBI_Build].to_i.should == 37 }
250
+
251
+ context "extra parsed properties" do
252
+ it { @maf.select_property(@repo,"sample_id",patient: "BH-A0HP")[:sample_id].should == "01A-12D-A099-09" }
253
+ it { @maf.select_property(@repo,"patient_id",patient: "BH-A0HP")[:patient_id].should == "BH-A0HP" }
254
+ end
255
+
256
+ context "multiple restrictions" do
257
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 10)[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
258
+ it { @maf.select_property(@repo,"Entrez_Gene_Id",patient: "BH-A0HP", :Chromosome => 2).should == [] }
259
+ end
260
+
261
+ context "multiple selections" do
262
+ it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Entrez_Gene_Id].to_s.should == 'http://identifiers.org/ncbigene/29974' }
263
+ it { @maf.select_property(@repo,['Hugo_Symbol', 'Entrez_Gene_Id'],patient: "BH-A0HP")[:Hugo_Symbol].to_s.should == 'http://identifiers.org/hgnc.symbol/A1CF' }
264
+
265
+ end
266
+
267
+ context "non-existant properties" do
268
+ it { @maf.select_property(@repo,"Chunkiness",patient: "BH-A0HP").should == [] }
269
+ end
270
+ end
271
+
272
+ context "remote service calls", no_travis: true do
273
+ describe ".gene_length" do
274
+ it { @maf.gene_length('A2BP1').should == 1694245 }
275
+ end
276
+
277
+ # describe ".official_symbol" do
278
+ # it { @maf.official_symbol('A2BP1').should == 'RBFOX1' }
279
+ # end
280
+
281
+ describe ".gene_info" do
282
+ it 'collects the number of mutations and gene lengths for each mutation' do
283
+ gene = @maf.gene_info('A1BG',@repo)
284
+ gene[:mutations].should == 2
285
+ gene[:gene_length].should == 8321
286
+ gene[:patients].first.should == "E9-A22B"
287
+ end
288
+ end
289
+
290
+ describe ".patient_info" do
291
+ it 'collects the number of patients with a mutation in a gene and its length' do
292
+ patient = @maf.patient_info('BH-A0HP',@repo)
293
+ patient[:mutation_count].should == 1
294
+ patient[:mutations].first[:length].should == 79113
295
+ patient[:mutations].first[:symbol].should == 'http://identifiers.org/hgnc.symbol/A1CF'
296
+ end
297
+ end
298
+ end
299
+ end
300
+
301
+ class QueryScript
302
+ def initialize(repo=nil)
303
+ @__maf = MafQuery.new
304
+ unless repo
305
+ @__repo = @__maf.generate_data
306
+ else
307
+ @__repo = repo
308
+ end
309
+ end
310
+
311
+ def select(operation,*args)
312
+ if @__maf.methods.include?(:"select_#{operation}")
313
+ @__maf.to_por(@__maf.send(:"select_#{operation}",@__repo,*args))
314
+ else
315
+ @__maf.to_por(@__maf.select_property(@__repo,operation,*args))
316
+ end
317
+ end
318
+
319
+ def gene_length(gene)
320
+ @__maf.to_por(@__maf.gene_length(gene))
321
+ end
322
+
323
+ def report_for(type, id)
324
+ @__maf.send(:"#{type}_info",id, @__repo)
325
+ end
326
+ end
327
+
328
+ describe QueryScript do
329
+ describe ".select" do
330
+ before(:all){
331
+ @ev = QueryScript.new
332
+ }
333
+
334
+ it { @ev.select('patient_count', "BH-A0HP").should > 0 }
335
+
336
+ context "with instance_eval" do
337
+ it { @ev.instance_eval("select 'patient_count', patient: 'BH-A0HP'").should > 0 }
338
+ it { @ev.instance_eval("select 'Hugo_Symbol', patient: 'BH-A0HP'").should == 'http://identifiers.org/hgnc.symbol/A1CF' }
339
+ it { @ev.instance_eval("select 'Chromosome', patient: 'BH-A0HP'").is_a?(Fixnum).should be true }
340
+ it { @ev.instance_eval("report_for 'patient', 'BH-A0HP'").is_a?(Hash).should be true }
341
+ end
342
+ end
343
+ end