publisci 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +36 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +51 -0
  8. data/README.rdoc +48 -0
  9. data/Rakefile +68 -0
  10. data/bin/bio-publisci +106 -0
  11. data/bin/bio-publisci-server +50 -0
  12. data/examples/bio-band_integration.rb +9 -0
  13. data/examples/no_magic.prov +58 -0
  14. data/examples/no_magic.rb +58 -0
  15. data/examples/orm.prov +48 -0
  16. data/examples/primer-full.prov +120 -0
  17. data/examples/primer.prov +66 -0
  18. data/examples/prov_dsl.prov +85 -0
  19. data/examples/safe_gen.rb +7 -0
  20. data/examples/visualization/primer.prov +66 -0
  21. data/examples/visualization/prov_viz.rb +140 -0
  22. data/examples/visualization/viz.rb +35 -0
  23. data/features/create_generator.feature +21 -0
  24. data/features/integration.feature +12 -0
  25. data/features/integration_steps.rb +10 -0
  26. data/features/metadata.feature +37 -0
  27. data/features/metadata_steps.rb +40 -0
  28. data/features/orm.feature +60 -0
  29. data/features/orm_steps.rb +74 -0
  30. data/features/prov_dsl.feature +14 -0
  31. data/features/prov_dsl_steps.rb +11 -0
  32. data/features/reader.feature +25 -0
  33. data/features/reader_steps.rb +61 -0
  34. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  35. data/features/store.feature +27 -0
  36. data/features/store_steps.rb +42 -0
  37. data/features/support/env.rb +13 -0
  38. data/features/writer.feature +14 -0
  39. data/features/writer_steps.rb +24 -0
  40. data/lib/bio-publisci.rb +64 -0
  41. data/lib/bio-publisci/analyzer.rb +57 -0
  42. data/lib/bio-publisci/datacube_model.rb +111 -0
  43. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
  44. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  45. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  46. data/lib/bio-publisci/dataset/data_cube.rb +418 -0
  47. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  48. data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
  49. data/lib/bio-publisci/dataset/interactive.rb +72 -0
  50. data/lib/bio-publisci/dsl/config.rb +34 -0
  51. data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
  52. data/lib/bio-publisci/dsl/dsl.rb +72 -0
  53. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  54. data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
  55. data/lib/bio-publisci/metadata/generator.rb +323 -0
  56. data/lib/bio-publisci/metadata/metadata.rb +5 -0
  57. data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
  58. data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
  59. data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
  60. data/lib/bio-publisci/metadata/prov/association.rb +107 -0
  61. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  62. data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
  63. data/lib/bio-publisci/metadata/prov/element.rb +120 -0
  64. data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
  65. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
  66. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  67. data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
  68. data/lib/bio-publisci/metadata/prov/role.rb +40 -0
  69. data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
  70. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  71. data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
  72. data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
  73. data/lib/bio-publisci/mixins/registry.rb +27 -0
  74. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  75. data/lib/bio-publisci/output.rb +27 -0
  76. data/lib/bio-publisci/parser.rb +266 -0
  77. data/lib/bio-publisci/post_processor.rb +95 -0
  78. data/lib/bio-publisci/query/query_helper.rb +123 -0
  79. data/lib/bio-publisci/r_client.rb +54 -0
  80. data/lib/bio-publisci/readers/arff.rb +49 -0
  81. data/lib/bio-publisci/readers/base.rb +57 -0
  82. data/lib/bio-publisci/readers/csv.rb +88 -0
  83. data/lib/bio-publisci/readers/dataframe.rb +67 -0
  84. data/lib/bio-publisci/readers/maf.rb +199 -0
  85. data/lib/bio-publisci/readers/r_cross.rb +112 -0
  86. data/lib/bio-publisci/readers/r_matrix.rb +176 -0
  87. data/lib/bio-publisci/store.rb +56 -0
  88. data/lib/bio-publisci/writers/arff.rb +91 -0
  89. data/lib/bio-publisci/writers/base.rb +93 -0
  90. data/lib/bio-publisci/writers/csv.rb +31 -0
  91. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  92. data/lib/bio-publisci/writers/json.rb +18 -0
  93. data/lib/r2rdf.rb +226 -0
  94. data/lib/template_bak.rb +12 -0
  95. data/lib/template_bak/publisci.rb +3 -0
  96. data/lib/vocabs/cc.rb +18 -0
  97. data/lib/vocabs/cert.rb +13 -0
  98. data/lib/vocabs/dc.rb +63 -0
  99. data/lib/vocabs/dc11.rb +23 -0
  100. data/lib/vocabs/doap.rb +45 -0
  101. data/lib/vocabs/exif.rb +168 -0
  102. data/lib/vocabs/foaf.rb +69 -0
  103. data/lib/vocabs/geo.rb +13 -0
  104. data/lib/vocabs/http.rb +26 -0
  105. data/lib/vocabs/ma.rb +78 -0
  106. data/lib/vocabs/owl.rb +59 -0
  107. data/lib/vocabs/rdfs.rb +17 -0
  108. data/lib/vocabs/rsa.rb +12 -0
  109. data/lib/vocabs/rss.rb +14 -0
  110. data/lib/vocabs/sioc.rb +93 -0
  111. data/lib/vocabs/skos.rb +36 -0
  112. data/lib/vocabs/wot.rb +21 -0
  113. data/lib/vocabs/xhtml.rb +9 -0
  114. data/lib/vocabs/xsd.rb +58 -0
  115. data/resources/maf_example.maf +10 -0
  116. data/resources/maf_rdf.ttl +1173 -0
  117. data/resources/primer.ttl +38 -0
  118. data/resources/queries/code_resources.rq +10 -0
  119. data/resources/queries/codes.rq +18 -0
  120. data/resources/queries/dataset.rq +7 -0
  121. data/resources/queries/dimension_ranges.rq +8 -0
  122. data/resources/queries/dimensions.rq +12 -0
  123. data/resources/queries/gene.rq +16 -0
  124. data/resources/queries/hugo_to_ensembl.rq +7 -0
  125. data/resources/queries/maf_column.rq +26 -0
  126. data/resources/queries/measures.rq +12 -0
  127. data/resources/queries/observation_labels.rq +8 -0
  128. data/resources/queries/observations.rq +13 -0
  129. data/resources/queries/patient.rq +11 -0
  130. data/resources/queries/patient_list.rq +11 -0
  131. data/resources/queries/patients_with_mutation.rq +18 -0
  132. data/resources/queries/properties.rq +8 -0
  133. data/resources/queries/test.rq +3 -0
  134. data/resources/weather.numeric.arff +28 -0
  135. data/scripts/get_gene_lengths.rb +50 -0
  136. data/scripts/islet_mlratio.rb +6 -0
  137. data/scripts/scan_islet.rb +6 -0
  138. data/scripts/update_reference.rb +25 -0
  139. data/server/helpers.rb +215 -0
  140. data/server/public/src-min-noconflict/LICENSE +24 -0
  141. data/server/public/src-min-noconflict/ace.js +11 -0
  142. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  143. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  144. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  145. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  146. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  147. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  148. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  149. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  150. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  151. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  152. data/server/public/src-min-noconflict/ext-split.js +1 -0
  153. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  154. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  155. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  156. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  157. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  158. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  159. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  160. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  161. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  162. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  163. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  164. data/server/public/src-min-noconflict/worker-css.js +1 -0
  165. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  166. data/server/public/src-min-noconflict/worker-json.js +1 -0
  167. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  168. data/server/public/src-min-noconflict/worker-php.js +1 -0
  169. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  170. data/server/routes.rb +123 -0
  171. data/server/views/dsl.haml +65 -0
  172. data/server/views/dump.haml +3 -0
  173. data/server/views/import.haml +35 -0
  174. data/server/views/new_repository.haml +25 -0
  175. data/server/views/query.haml +28 -0
  176. data/server/views/repository.haml +25 -0
  177. data/spec/ORM/data_cube_orm_spec.rb +33 -0
  178. data/spec/ORM/prov_model_spec.rb +72 -0
  179. data/spec/analyzer_spec.rb +36 -0
  180. data/spec/bnode_spec.rb +66 -0
  181. data/spec/csv/bacon.csv +4 -0
  182. data/spec/csv/moar_bacon.csv +11 -0
  183. data/spec/data_cube_spec.rb +169 -0
  184. data/spec/dataset_for_spec.rb +77 -0
  185. data/spec/dsl_spec.rb +134 -0
  186. data/spec/generators/csv_spec.rb +44 -0
  187. data/spec/generators/dataframe_spec.rb +44 -0
  188. data/spec/generators/maf_spec.rb +40 -0
  189. data/spec/generators/r_cross_spec.rb +51 -0
  190. data/spec/generators/r_matrix_spec.rb +44 -0
  191. data/spec/length_lookup_spec.rb +0 -0
  192. data/spec/maf_query_spec.rb +343 -0
  193. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  194. data/spec/prov/activity_spec.rb +74 -0
  195. data/spec/prov/agent_spec.rb +54 -0
  196. data/spec/prov/association_spec.rb +55 -0
  197. data/spec/prov/config_spec.rb +28 -0
  198. data/spec/prov/derivation_spec.rb +30 -0
  199. data/spec/prov/entity_spec.rb +52 -0
  200. data/spec/prov/role_spec.rb +94 -0
  201. data/spec/prov/usage_spec.rb +98 -0
  202. data/spec/queries/integrity/1.rq +21 -0
  203. data/spec/queries/integrity/11.rq +29 -0
  204. data/spec/queries/integrity/12.rq +37 -0
  205. data/spec/queries/integrity/14.rq +25 -0
  206. data/spec/queries/integrity/19_1.rq +21 -0
  207. data/spec/queries/integrity/19_2.rq +15 -0
  208. data/spec/queries/integrity/2.rq +22 -0
  209. data/spec/queries/integrity/3.rq +19 -0
  210. data/spec/queries/integrity/4.rq +13 -0
  211. data/spec/queries/integrity/5.rq +14 -0
  212. data/spec/r_builder_spec.rb +33 -0
  213. data/spec/resource/.RData +0 -0
  214. data/spec/resource/example.Rhistory +3 -0
  215. data/spec/spec_helper.rb +17 -0
  216. data/spec/turtle/bacon +147 -0
  217. data/spec/turtle/reference +2064 -0
  218. data/spec/turtle/weather +275 -0
  219. data/spec/writer_spec.rb +75 -0
  220. metadata +589 -0
@@ -0,0 +1,33 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::ORM do
4
+
5
+ it "should load and save a turtle file without loss of information in old ORM" do
6
+ pending("pending rewrite of abbreviaton method to account for base_url")
7
+ ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
8
+ cube = PubliSci::DataSet::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
9
+ cube.abbreviate_known(cube.to_n3).should == ref
10
+ # cube.to_n3.should == ref
11
+ end
12
+
13
+ it "should load properties for Observation object" do
14
+ ev = PubliSci::DSL::Instance.new
15
+ r = ev.instance_eval do
16
+ data do
17
+ object 'spec/csv/bacon.csv'
18
+ end
19
+
20
+ to_repository
21
+ end
22
+ Spira.add_repository :default, r
23
+
24
+ PubliSci::ORM::Observation.count.should > 0
25
+
26
+ PubliSci::ORM::Observation.first.load_properties
27
+ fi = PubliSci::ORM::Observation.first
28
+ fi.chunkiness.should_not be nil
29
+ fi.deliciousness.should_not be nil
30
+
31
+ end
32
+
33
+ end
@@ -0,0 +1,72 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+ include PubliSci::Prov::DSL
3
+ # include PubliSci::Prov
4
+
5
+ describe PubliSci::Prov::Model do
6
+ it "can be loaded from" do
7
+ ev = PubliSci::Prov::DSL::Instance.new
8
+ r = ev.instance_eval do
9
+ entity :datathing
10
+
11
+ activity :process, generated: :datathing
12
+
13
+ to_repository
14
+ end
15
+
16
+ Spira.add_repository :default, r
17
+ PubliSci::Prov::Model::Entity.first.should_not be nil
18
+ end
19
+
20
+ context "has useful methods built in to models" do
21
+ it "can reverse chain associated activities for agents" do
22
+ ev = PubliSci::Prov::DSL::Instance.new
23
+
24
+ ag = ev.instance_eval do
25
+ agent :some_dudette
26
+ end
27
+
28
+ act = ev.instance_eval do
29
+ entity :datathing
30
+
31
+ activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
32
+ end
33
+
34
+ r = ev.instance_eval do
35
+ to_repository
36
+ end
37
+
38
+
39
+ # z= ev.instance_eval do
40
+ # generate_n3
41
+ # end
42
+
43
+ Spira.add_repository :default, r
44
+ model_agent = PubliSci::Prov::Model::Agent.first
45
+ ag.subject.should == model_agent.subject
46
+ acts = model_agent.activities
47
+ acts.first.subject.should == act.subject
48
+ end
49
+
50
+ it "can dump all types for Entities" do
51
+ ev = PubliSci::Prov::DSL::Instance.new
52
+
53
+ qb = RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
54
+
55
+ r = ev.instance_eval do
56
+ agent :some_dudette
57
+
58
+ entity :datathing do
59
+ has RDF.type, qb.DataSet
60
+ end
61
+
62
+ activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
63
+
64
+ to_repository
65
+ end
66
+
67
+ Spira.add_repository :default, r
68
+ PubliSci::Prov::Model::Entity.first.all_types.should == %w{http://www.w3.org/ns/prov#Entity http://purl.org/linked-data/cube#DataSet}
69
+ PubliSci::Prov::Model::Entity.first.has_data?.should == true
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,36 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::Analyzer do
4
+ class Ana
5
+ include PubliSci::Analyzer
6
+ end
7
+
8
+ before(:all) do
9
+ @analyzer = Ana.new
10
+
11
+ @measures = ['chunkiness','deliciousness']
12
+ @dimensions = ['producer', 'pricerange']
13
+ @labels = %w(hormel newskies whys)
14
+ @data =
15
+ {
16
+ "producer" => ["hormel","newskies", "whys"],
17
+ "pricerange" => ["low", "medium", "nonexistant"],
18
+ "chunkiness"=> [1, 6, 9001],
19
+ "deliciousness"=> [1, 9, 6]
20
+ }
21
+ end
22
+
23
+ it "should run a basic validation" do
24
+ newdata = []
25
+
26
+ @data.keys.size.times{|i|
27
+ obs = {}
28
+ @data.map{|k,v|
29
+ obs[k] = v[i]
30
+ }
31
+ newdata << obs
32
+ }
33
+
34
+ @analyzer.check_integrity(newdata, @measures, @dimensions)
35
+ end
36
+ end
@@ -0,0 +1,66 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+
4
+ describe "DataCube Node Generation" do
5
+
6
+ context "with Plain Old Ruby objects" do
7
+ #define a temporary class to use module methods
8
+ before(:all) do
9
+ class Gen
10
+ include PubliSci::Dataset::DataCube
11
+ end
12
+
13
+ @generator = Gen.new
14
+ @measures = ['chunkiness','deliciousness']
15
+ @dimensions = ['producer', 'pricerange']
16
+ @codes = @dimensions #all dimensions coded for the tests
17
+ @labels = %w(hormel newskies whys)
18
+ @data =
19
+ {
20
+ "producer" => ["hormel","newskies", "whys"],
21
+ "pricerange" => ["low", "medium", "nonexistant"],
22
+ "chunkiness"=> [1, 6, 9001],
23
+ "deliciousness"=> [1, 9, 6]
24
+ }
25
+ end
26
+
27
+ it "represents nested arrays using blank nodes" do
28
+ newdata = Hash[@data.map{|k,v| [k,v.first] }]
29
+ newdata.keys.each{|k| newdata[k] =[[["a", "rdf:Property"],["<http://semanticscience.org/resource/SIO_000300>", newdata[k]]]] }
30
+ observations = @generator.observations(@measures, [], [], newdata, @labels[0], "bacon")
31
+ observations.is_a?(Array).should == true
32
+ # puts observations.first.class
33
+ observations.first.is_a?(String).should == true
34
+ # puts observations
35
+ # observations.first[%r{\[ a rdf:Property ;\n<http://semanticscience.org/resource/SIO_000300> 1 \n \]}].should_not be nil
36
+ end
37
+
38
+ it "can nest arrays to some depth" do
39
+ newdata = Hash[@data.map{|k,v| [k,v.first] }]
40
+ newdata.keys.each{|k|
41
+ if ["producer","chunkiness"].include? k
42
+ newdata[k] = [
43
+ [
44
+ ["a", "rdf:MacGuffin"] ,
45
+ [
46
+ "<http://semanticscience.org/resource/SIO_000300>",
47
+ [
48
+ ["a", "rdf:Absurdity"],
49
+ [ 'rdf:value', newdata[k] ]
50
+ ]
51
+ ]
52
+ ]]
53
+
54
+ end
55
+ }
56
+
57
+ observations = @generator.observations(@measures, @dimensions, [], newdata, @labels[0], "bacon")
58
+ observations.is_a?(Array).should == true
59
+ observations.first.is_a?(String).should == true
60
+ # observations.first.count('[').should == 4
61
+ # observations.first.count(']').should == 4
62
+
63
+ # observations.first[%r{\[ a rdf:Property ;\n <http://semanticscience.org/resource/SIO_000300> 1 \]}].should_not be nil
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,4 @@
1
+ producer,pricerange,chunkiness,deliciousness
2
+ hormel,low,1,1
3
+ newskies,medium,6,9
4
+ whys,nonexistant,9001,6
@@ -0,0 +1,11 @@
1
+ producer,pricerange,chunkiness,deliciousness
2
+ hormel,low,1,1
3
+ oscar_mayer,low,6,3
4
+ newskies,medium,2,5
5
+ hormel,low,2,2
6
+ DArtagnan,medium,6,9
7
+ oscar_mayer,low,5,4
8
+ hormel,low,2,1
9
+ farmland,high,6,9
10
+ newskies,medium,6,9
11
+ whys,nonexistant,9001,6
@@ -0,0 +1,169 @@
1
+ # require_relative '../lib/r2rdf/data_cube.rb'
2
+ # require_relative '../lib/r2rdf/generators/dataframe.rb'
3
+ # require_relative '../lib/r2rdf/r_client.rb'
4
+ # require_relative '../lib/r2rdf/r_builder.rb'
5
+ # require_relative '../lib/r2rdf/generators/csv.rb'
6
+
7
+ require_relative '../lib/bio-publisci.rb'
8
+
9
+
10
+ describe PubliSci::Dataset::DataCube do
11
+
12
+ context "with Plain Old Ruby objects" do
13
+ #define a temporary class to use module methods
14
+ before(:all) do
15
+ class Gen
16
+ include PubliSci::Dataset::DataCube
17
+ end
18
+
19
+ @generator = Gen.new
20
+ @measures = ['chunkiness','deliciousness']
21
+ @dimensions = ['producer', 'pricerange']
22
+ @codes = @dimensions #all dimensions coded for the tests
23
+ @labels = %w(hormel newskies whys)
24
+ @data =
25
+ {
26
+ "producer" => ["hormel","newskies", "whys"],
27
+ "pricerange" => ["low", "medium", "nonexistant"],
28
+ "chunkiness"=> [1, 6, 9001],
29
+ "deliciousness"=> [1, 9, 6]
30
+ }
31
+ end
32
+
33
+ it "should have correct output according to the reference file" do
34
+
35
+ turtle_string = @generator.generate(@measures, @dimensions, @codes, @data, @labels, 'bacon')
36
+ ref = IO.read(File.dirname(__FILE__) + '/turtle/bacon')
37
+ turtle_string.should == ref
38
+ end
39
+
40
+ context "with missing values" do
41
+
42
+ before(:all) do
43
+ @missing_data = Marshal.load(Marshal.dump(@data))
44
+ missingobs = {
45
+ "producer" => "missingbacon",
46
+ "pricerange" => "unknown",
47
+ "chunkiness"=> nil,
48
+ "deliciousness"=> nil,
49
+ }
50
+ missingobs.map{|k,v| @missing_data[k] << v}
51
+ end
52
+
53
+ it "skips observations with missing values by default" do
54
+ turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
55
+ turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
56
+ turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should be nil
57
+ end
58
+
59
+ it "includes observations with missing values if flag is set" do
60
+ turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon',{encode_nulls: true})
61
+ turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
62
+ turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should_not be nil
63
+ end
64
+
65
+ end
66
+
67
+ it 'generates prefixes' do
68
+ prefixes = @generator.prefixes('bacon')
69
+ prefixes.is_a?(String).should == true
70
+ end
71
+
72
+ it 'generates data structure definition' do
73
+ dsd = @generator.data_structure_definition(@measures, @dimensions, @codes, "bacon")
74
+ dsd.is_a?(String).should == true
75
+ end
76
+
77
+ it 'generates dataset' do
78
+ dsd = @generator.dataset("bacon")
79
+ dsd.is_a?(String).should == true
80
+ end
81
+
82
+ it 'generates component specifications' do
83
+ components = @generator.component_specifications(@measures , @dimensions, @codes, "bacon")
84
+ components.is_a?(Array).should == true
85
+ components.first.is_a?(String).should == true
86
+ end
87
+
88
+ it 'generates dimension properties' do
89
+ dimensions = @generator.dimension_properties(@dimensions,@codes,"bacon")
90
+ dimensions.is_a?(Array).should == true
91
+ dimensions.first.is_a?(String).should == true
92
+ end
93
+
94
+ it 'generates measure properties' do
95
+ measures = @generator.measure_properties(@measures, "bacon")
96
+ measures.is_a?(Array).should == true
97
+ measures.first.is_a?(String).should == true
98
+ end
99
+
100
+ it 'generates observations' do
101
+ #measures, dimensions, codes, var, observation_labels, data, options={}
102
+
103
+ observations = @generator.observations(@measures, @dimensions, @codes, @data, @labels, "bacon")
104
+ observations.is_a?(Array).should == true
105
+ observations.first.is_a?(String).should == true
106
+ end
107
+
108
+ it "coerces single values into arrays" do
109
+ newdata = Hash[@data.map{|k,v| [k,v.first] }]
110
+ observations = @generator.observations(@measures, @dimensions, @codes, newdata, @labels[0], "bacon")
111
+ observations.is_a?(Array).should == true
112
+ observations.first.is_a?(String).should == true
113
+ end
114
+ end
115
+
116
+
117
+ context "under official integrity constraints" do
118
+ before(:all) do
119
+ @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
120
+ @checks = {}
121
+ Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
122
+ if file.split('.').last == 'rq'
123
+ @checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
124
+ end
125
+ end
126
+ end
127
+
128
+ it 'obeys IC-1, has a unique dataset for each observation' do
129
+ SPARQL.execute(@checks['1'], @graph).first.should be_nil
130
+ end
131
+
132
+ it 'obeys IC-2, has a unique data structure definition of each dataset' do
133
+ SPARQL.execute(@checks['2'], @graph).first.should be_nil
134
+ end
135
+
136
+ it 'obeys IC-3, has a measure property specified for each dataset' do
137
+ SPARQL.execute(@checks['3'], @graph).first.should be_nil
138
+ end
139
+
140
+ it 'obeys IC-4, specifies a range for all dimensions' do
141
+ SPARQL.execute(@checks['4'], @graph).first.should be_nil
142
+ end
143
+
144
+ it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
145
+ SPARQL.execute(@checks['5'], @graph).first.should be_nil
146
+ end
147
+
148
+ it 'obeys IC-11, has a value for each dimension in every observation' do
149
+ SPARQL.execute(@checks['11'], @graph).first.should be_nil
150
+ end
151
+
152
+ ## currently locks up. possible bug in SPARQL gem parsing?
153
+ ## works fine as a raw query
154
+ # it 'obeys IC-12, has do duplicate observations' do
155
+ # SPARQL.execute(@checks['12'], @graph).first.should be_nil
156
+ # end
157
+
158
+ it 'obeys IC-14, has a value for each measure in every observation' do
159
+ SPARQL.execute(@checks['14'], @graph).first.should be_nil
160
+ end
161
+
162
+ it 'obeys IC-19, all codes for each codeList are included' do
163
+ SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
164
+ ## second query for IC-19 uses property paths that aren't as easy to
165
+ ## convert to sparql 1.0, so for now I've left it out
166
+ # SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,77 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::Dataset do
4
+ it "should use sio:has_value for unknown string types" do
5
+ pending("pending refactor dataset_for to handle raw remote files better")
6
+ turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
7
+ (turtle_string =~ /hasValue/).should_not be nil
8
+ # open('ttl.ttl','w'){|f| f.write turtle_string}
9
+ repo = RDF::Repository.new
10
+
11
+ f = Tempfile.new(['repo','.ttl'])
12
+ f.write(turtle_string)
13
+ f.close
14
+ repo.load(f.path, :format => :ttl)
15
+ f.unlink
16
+
17
+ repo.size.should > 0
18
+ end
19
+
20
+ it "can convert arff files" do
21
+ turtle_string = PubliSci::Dataset.for('resources/weather.numeric.arff',false)
22
+ turtle_string.should == IO.read('spec/turtle/weather')
23
+ end
24
+
25
+ describe ".register_reader" do
26
+ it "can register readers to be used by Dataset.for" do
27
+ PubliSci::Dataset.reader_registry.clear
28
+ expect { PubliSci::Dataset.for('resources/maf_example.maf') }.to raise_error
29
+ PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF)
30
+ file = PubliSci::Dataset.for('resources/maf_example.maf')
31
+ str = IO.read(file)
32
+ File.delete(file.path)
33
+ str.size.should > 0
34
+ (str =~ /qb:Observation/).should_not be nil
35
+ end
36
+ end
37
+
38
+ context 'with a csv file' do
39
+ before(:all) do
40
+ @file = File.dirname(__FILE__) + '/csv/bacon.csv'
41
+ end
42
+
43
+ it "should load with no prompts if all details are specified" do
44
+ turtle_string = PubliSci::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
45
+ (turtle_string =~ /qb:Observation/).should_not be nil
46
+ end
47
+
48
+ it "will download remote files" do
49
+ turtle_string = PubliSci::Dataset.for('https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv',false)
50
+ (turtle_string =~ /prop:pricerange/).should_not be nil
51
+ (turtle_string =~ /prop:producer/).should_not be nil
52
+ end
53
+
54
+ it "will request user input if not provided" do
55
+ gen = PubliSci::Readers::CSV.new
56
+ gen.stub(:gets).and_return('pricerange,producer')
57
+ gen.stub(:puts)
58
+ turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
59
+ (turtle_string =~ /prop:pricerange/).should_not be nil
60
+ (turtle_string =~ /prop:producer/).should_not be nil
61
+ end
62
+
63
+ it "will try to guess if told not to be interactive" do
64
+ turtle_string = PubliSci::Dataset.for(@file,false)
65
+ (turtle_string =~ /prop:pricerange/).should_not be nil
66
+ (turtle_string =~ /prop:producer/).should_not be nil
67
+ end
68
+
69
+ it "will attempt to load remote file if given URI" do
70
+ loc = 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
71
+ turtle_string = PubliSci::Dataset.for(loc,false)
72
+ (turtle_string =~ /prop:pricerange/).should_not be nil
73
+ (turtle_string =~ /prop:producer/).should_not be nil
74
+ end
75
+ end
76
+
77
+ end