publisci 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
@@ -0,0 +1,33 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::ORM do
4
+
5
+ it "should load and save a turtle file without loss of information in old ORM" do
6
+ pending("pending rewrite of abbreviaton method to account for base_url")
7
+ ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
8
+ cube = PubliSci::DataSet::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
9
+ cube.abbreviate_known(cube.to_n3).should == ref
10
+ # cube.to_n3.should == ref
11
+ end
12
+
13
+ it "should load properties for Observation object" do
14
+ ev = PubliSci::DSL::Instance.new
15
+ r = ev.instance_eval do
16
+ data do
17
+ object 'spec/csv/bacon.csv'
18
+ end
19
+
20
+ to_repository
21
+ end
22
+ Spira.add_repository :default, r
23
+
24
+ PubliSci::ORM::Observation.count.should > 0
25
+
26
+ PubliSci::ORM::Observation.first.load_properties
27
+ fi = PubliSci::ORM::Observation.first
28
+ fi.chunkiness.should_not be nil
29
+ fi.deliciousness.should_not be nil
30
+
31
+ end
32
+
33
+ end
@@ -0,0 +1,72 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+ include PubliSci::Prov::DSL
3
+ # include PubliSci::Prov
4
+
5
+ describe PubliSci::Prov::Model do
6
+ it "can be loaded from" do
7
+ ev = PubliSci::Prov::DSL::Instance.new
8
+ r = ev.instance_eval do
9
+ entity :datathing
10
+
11
+ activity :process, generated: :datathing
12
+
13
+ to_repository
14
+ end
15
+
16
+ Spira.add_repository :default, r
17
+ PubliSci::Prov::Model::Entity.first.should_not be nil
18
+ end
19
+
20
+ context "has useful methods built in to models" do
21
+ it "can reverse chain associated activities for agents" do
22
+ ev = PubliSci::Prov::DSL::Instance.new
23
+
24
+ ag = ev.instance_eval do
25
+ agent :some_dudette
26
+ end
27
+
28
+ act = ev.instance_eval do
29
+ entity :datathing
30
+
31
+ activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
32
+ end
33
+
34
+ r = ev.instance_eval do
35
+ to_repository
36
+ end
37
+
38
+
39
+ # z= ev.instance_eval do
40
+ # generate_n3
41
+ # end
42
+
43
+ Spira.add_repository :default, r
44
+ model_agent = PubliSci::Prov::Model::Agent.first
45
+ ag.subject.should == model_agent.subject
46
+ acts = model_agent.activities
47
+ acts.first.subject.should == act.subject
48
+ end
49
+
50
+ it "can dump all types for Entities" do
51
+ ev = PubliSci::Prov::DSL::Instance.new
52
+
53
+ qb = RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
54
+
55
+ r = ev.instance_eval do
56
+ agent :some_dudette
57
+
58
+ entity :datathing do
59
+ has RDF.type, qb.DataSet
60
+ end
61
+
62
+ activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
63
+
64
+ to_repository
65
+ end
66
+
67
+ Spira.add_repository :default, r
68
+ PubliSci::Prov::Model::Entity.first.all_types.should == %w{http://www.w3.org/ns/prov#Entity http://purl.org/linked-data/cube#DataSet}
69
+ PubliSci::Prov::Model::Entity.first.has_data?.should == true
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,36 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::Analyzer do
4
+ class Ana
5
+ include PubliSci::Analyzer
6
+ end
7
+
8
+ before(:all) do
9
+ @analyzer = Ana.new
10
+
11
+ @measures = ['chunkiness','deliciousness']
12
+ @dimensions = ['producer', 'pricerange']
13
+ @labels = %w(hormel newskies whys)
14
+ @data =
15
+ {
16
+ "producer" => ["hormel","newskies", "whys"],
17
+ "pricerange" => ["low", "medium", "nonexistant"],
18
+ "chunkiness"=> [1, 6, 9001],
19
+ "deliciousness"=> [1, 9, 6]
20
+ }
21
+ end
22
+
23
+ it "should run a basic validation" do
24
+ newdata = []
25
+
26
+ @data.keys.size.times{|i|
27
+ obs = {}
28
+ @data.map{|k,v|
29
+ obs[k] = v[i]
30
+ }
31
+ newdata << obs
32
+ }
33
+
34
+ @analyzer.check_integrity(newdata, @measures, @dimensions)
35
+ end
36
+ end
@@ -0,0 +1,66 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+
4
+ describe "DataCube Node Generation" do
5
+
6
+ context "with Plain Old Ruby objects" do
7
+ #define a temporary class to use module methods
8
+ before(:all) do
9
+ class Gen
10
+ include PubliSci::Dataset::DataCube
11
+ end
12
+
13
+ @generator = Gen.new
14
+ @measures = ['chunkiness','deliciousness']
15
+ @dimensions = ['producer', 'pricerange']
16
+ @codes = @dimensions #all dimensions coded for the tests
17
+ @labels = %w(hormel newskies whys)
18
+ @data =
19
+ {
20
+ "producer" => ["hormel","newskies", "whys"],
21
+ "pricerange" => ["low", "medium", "nonexistant"],
22
+ "chunkiness"=> [1, 6, 9001],
23
+ "deliciousness"=> [1, 9, 6]
24
+ }
25
+ end
26
+
27
+ it "represents nested arrays using blank nodes" do
28
+ newdata = Hash[@data.map{|k,v| [k,v.first] }]
29
+ newdata.keys.each{|k| newdata[k] =[[["a", "rdf:Property"],["<http://semanticscience.org/resource/SIO_000300>", newdata[k]]]] }
30
+ observations = @generator.observations(@measures, [], [], newdata, @labels[0], "bacon")
31
+ observations.is_a?(Array).should == true
32
+ # puts observations.first.class
33
+ observations.first.is_a?(String).should == true
34
+ # puts observations
35
+ # observations.first[%r{\[ a rdf:Property ;\n<http://semanticscience.org/resource/SIO_000300> 1 \n \]}].should_not be nil
36
+ end
37
+
38
+ it "can nest arrays to some depth" do
39
+ newdata = Hash[@data.map{|k,v| [k,v.first] }]
40
+ newdata.keys.each{|k|
41
+ if ["producer","chunkiness"].include? k
42
+ newdata[k] = [
43
+ [
44
+ ["a", "rdf:MacGuffin"] ,
45
+ [
46
+ "<http://semanticscience.org/resource/SIO_000300>",
47
+ [
48
+ ["a", "rdf:Absurdity"],
49
+ [ 'rdf:value', newdata[k] ]
50
+ ]
51
+ ]
52
+ ]]
53
+
54
+ end
55
+ }
56
+
57
+ observations = @generator.observations(@measures, @dimensions, [], newdata, @labels[0], "bacon")
58
+ observations.is_a?(Array).should == true
59
+ observations.first.is_a?(String).should == true
60
+ # observations.first.count('[').should == 4
61
+ # observations.first.count(']').should == 4
62
+
63
+ # observations.first[%r{\[ a rdf:Property ;\n <http://semanticscience.org/resource/SIO_000300> 1 \]}].should_not be nil
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,4 @@
1
+ producer,pricerange,chunkiness,deliciousness
2
+ hormel,low,1,1
3
+ newskies,medium,6,9
4
+ whys,nonexistant,9001,6
@@ -0,0 +1,11 @@
1
+ producer,pricerange,chunkiness,deliciousness
2
+ hormel,low,1,1
3
+ oscar_mayer,low,6,3
4
+ newskies,medium,2,5
5
+ hormel,low,2,2
6
+ DArtagnan,medium,6,9
7
+ oscar_mayer,low,5,4
8
+ hormel,low,2,1
9
+ farmland,high,6,9
10
+ newskies,medium,6,9
11
+ whys,nonexistant,9001,6
@@ -0,0 +1,169 @@
1
+ # require_relative '../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../lib/r2rdf/generators/dataframe.rb'
3
+ # require_relative '../lib/r2rdf/r_client.rb'
4
+ # require_relative '../lib/r2rdf/r_builder.rb'
5
+ # require_relative '../lib/r2rdf/generators/csv.rb'
6
+
7
+ require_relative '../lib/bio-publisci.rb'
8
+
9
+
10
+ describe PubliSci::Dataset::DataCube do
11
+
12
+ context "with Plain Old Ruby objects" do
13
+ #define a temporary class to use module methods
14
+ before(:all) do
15
+ class Gen
16
+ include PubliSci::Dataset::DataCube
17
+ end
18
+
19
+ @generator = Gen.new
20
+ @measures = ['chunkiness','deliciousness']
21
+ @dimensions = ['producer', 'pricerange']
22
+ @codes = @dimensions #all dimensions coded for the tests
23
+ @labels = %w(hormel newskies whys)
24
+ @data =
25
+ {
26
+ "producer" => ["hormel","newskies", "whys"],
27
+ "pricerange" => ["low", "medium", "nonexistant"],
28
+ "chunkiness"=> [1, 6, 9001],
29
+ "deliciousness"=> [1, 9, 6]
30
+ }
31
+ end
32
+
33
+ it "should have correct output according to the reference file" do
34
+
35
+ turtle_string = @generator.generate(@measures, @dimensions, @codes, @data, @labels, 'bacon')
36
+ ref = IO.read(File.dirname(__FILE__) + '/turtle/bacon')
37
+ turtle_string.should == ref
38
+ end
39
+
40
+ context "with missing values" do
41
+
42
+ before(:all) do
43
+ @missing_data = Marshal.load(Marshal.dump(@data))
44
+ missingobs = {
45
+ "producer" => "missingbacon",
46
+ "pricerange" => "unknown",
47
+ "chunkiness"=> nil,
48
+ "deliciousness"=> nil,
49
+ }
50
+ missingobs.map{|k,v| @missing_data[k] << v}
51
+ end
52
+
53
+ it "skips observations with missing values by default" do
54
+ turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
55
+ turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
56
+ turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should be nil
57
+ end
58
+
59
+ it "includes observations with missing values if flag is set" do
60
+ turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon',{encode_nulls: true})
61
+ turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
62
+ turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should_not be nil
63
+ end
64
+
65
+ end
66
+
67
+ it 'generates prefixes' do
68
+ prefixes = @generator.prefixes('bacon')
69
+ prefixes.is_a?(String).should == true
70
+ end
71
+
72
+ it 'generates data structure definition' do
73
+ dsd = @generator.data_structure_definition(@measures, @dimensions, @codes, "bacon")
74
+ dsd.is_a?(String).should == true
75
+ end
76
+
77
+ it 'generates dataset' do
78
+ dsd = @generator.dataset("bacon")
79
+ dsd.is_a?(String).should == true
80
+ end
81
+
82
+ it 'generates component specifications' do
83
+ components = @generator.component_specifications(@measures , @dimensions, @codes, "bacon")
84
+ components.is_a?(Array).should == true
85
+ components.first.is_a?(String).should == true
86
+ end
87
+
88
+ it 'generates dimension properties' do
89
+ dimensions = @generator.dimension_properties(@dimensions,@codes,"bacon")
90
+ dimensions.is_a?(Array).should == true
91
+ dimensions.first.is_a?(String).should == true
92
+ end
93
+
94
+ it 'generates measure properties' do
95
+ measures = @generator.measure_properties(@measures, "bacon")
96
+ measures.is_a?(Array).should == true
97
+ measures.first.is_a?(String).should == true
98
+ end
99
+
100
+ it 'generates observations' do
101
+ #measures, dimensions, codes, var, observation_labels, data, options={}
102
+
103
+ observations = @generator.observations(@measures, @dimensions, @codes, @data, @labels, "bacon")
104
+ observations.is_a?(Array).should == true
105
+ observations.first.is_a?(String).should == true
106
+ end
107
+
108
+ it "coerces single values into arrays" do
109
+ newdata = Hash[@data.map{|k,v| [k,v.first] }]
110
+ observations = @generator.observations(@measures, @dimensions, @codes, newdata, @labels[0], "bacon")
111
+ observations.is_a?(Array).should == true
112
+ observations.first.is_a?(String).should == true
113
+ end
114
+ end
115
+
116
+
117
+ context "under official integrity constraints" do
118
+ before(:all) do
119
+ @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
120
+ @checks = {}
121
+ Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
122
+ if file.split('.').last == 'rq'
123
+ @checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
124
+ end
125
+ end
126
+ end
127
+
128
+ it 'obeys IC-1, has a unique dataset for each observation' do
129
+ SPARQL.execute(@checks['1'], @graph).first.should be_nil
130
+ end
131
+
132
+ it 'obeys IC-2, has a unique data structure definition of each dataset' do
133
+ SPARQL.execute(@checks['2'], @graph).first.should be_nil
134
+ end
135
+
136
+ it 'obeys IC-3, has a measure property specified for each dataset' do
137
+ SPARQL.execute(@checks['3'], @graph).first.should be_nil
138
+ end
139
+
140
+ it 'obeys IC-4, specifies a range for all dimensions' do
141
+ SPARQL.execute(@checks['4'], @graph).first.should be_nil
142
+ end
143
+
144
+ it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
145
+ SPARQL.execute(@checks['5'], @graph).first.should be_nil
146
+ end
147
+
148
+ it 'obeys IC-11, has a value for each dimension in every observation' do
149
+ SPARQL.execute(@checks['11'], @graph).first.should be_nil
150
+ end
151
+
152
+ ## currently locks up. possible bug in SPARQL gem parsing?
153
+ ## works fine as a raw query
154
+ # it 'obeys IC-12, has do duplicate observations' do
155
+ # SPARQL.execute(@checks['12'], @graph).first.should be_nil
156
+ # end
157
+
158
+ it 'obeys IC-14, has a value for each measure in every observation' do
159
+ SPARQL.execute(@checks['14'], @graph).first.should be_nil
160
+ end
161
+
162
+ it 'obeys IC-19, all codes for each codeList are included' do
163
+ SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
164
+ ## second query for IC-19 uses property paths that aren't as easy to
165
+ ## convert to sparql 1.0, so for now I've left it out
166
+ # SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,77 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::Dataset do
4
+ it "should use sio:has_value for unknown string types" do
5
+ pending("pending refactor dataset_for to handle raw remote files better")
6
+ turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
7
+ (turtle_string =~ /hasValue/).should_not be nil
8
+ # open('ttl.ttl','w'){|f| f.write turtle_string}
9
+ repo = RDF::Repository.new
10
+
11
+ f = Tempfile.new(['repo','.ttl'])
12
+ f.write(turtle_string)
13
+ f.close
14
+ repo.load(f.path, :format => :ttl)
15
+ f.unlink
16
+
17
+ repo.size.should > 0
18
+ end
19
+
20
+ it "can convert arff files" do
21
+ turtle_string = PubliSci::Dataset.for('resources/weather.numeric.arff',false)
22
+ turtle_string.should == IO.read('spec/turtle/weather')
23
+ end
24
+
25
+ describe ".register_reader" do
26
+ it "can register readers to be used by Dataset.for" do
27
+ PubliSci::Dataset.reader_registry.clear
28
+ expect { PubliSci::Dataset.for('resources/maf_example.maf') }.to raise_error
29
+ PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF)
30
+ file = PubliSci::Dataset.for('resources/maf_example.maf')
31
+ str = IO.read(file)
32
+ File.delete(file.path)
33
+ str.size.should > 0
34
+ (str =~ /qb:Observation/).should_not be nil
35
+ end
36
+ end
37
+
38
+ context 'with a csv file' do
39
+ before(:all) do
40
+ @file = File.dirname(__FILE__) + '/csv/bacon.csv'
41
+ end
42
+
43
+ it "should load with no prompts if all details are specified" do
44
+ turtle_string = PubliSci::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
45
+ (turtle_string =~ /qb:Observation/).should_not be nil
46
+ end
47
+
48
+ it "will download remote files" do
49
+ turtle_string = PubliSci::Dataset.for('https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv',false)
50
+ (turtle_string =~ /prop:pricerange/).should_not be nil
51
+ (turtle_string =~ /prop:producer/).should_not be nil
52
+ end
53
+
54
+ it "will request user input if not provided" do
55
+ gen = PubliSci::Readers::CSV.new
56
+ gen.stub(:gets).and_return('pricerange,producer')
57
+ gen.stub(:puts)
58
+ turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
59
+ (turtle_string =~ /prop:pricerange/).should_not be nil
60
+ (turtle_string =~ /prop:producer/).should_not be nil
61
+ end
62
+
63
+ it "will try to guess if told not to be interactive" do
64
+ turtle_string = PubliSci::Dataset.for(@file,false)
65
+ (turtle_string =~ /prop:pricerange/).should_not be nil
66
+ (turtle_string =~ /prop:producer/).should_not be nil
67
+ end
68
+
69
+ it "will attempt to load remote file if given URI" do
70
+ loc = 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
71
+ turtle_string = PubliSci::Dataset.for(loc,false)
72
+ (turtle_string =~ /prop:pricerange/).should_not be nil
73
+ (turtle_string =~ /prop:producer/).should_not be nil
74
+ end
75
+ end
76
+
77
+ end