publisci 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +36 -0
- data/LICENSE.txt +20 -0
- data/README.md +51 -0
- data/README.rdoc +48 -0
- data/Rakefile +68 -0
- data/bin/bio-publisci +106 -0
- data/bin/bio-publisci-server +50 -0
- data/examples/bio-band_integration.rb +9 -0
- data/examples/no_magic.prov +58 -0
- data/examples/no_magic.rb +58 -0
- data/examples/orm.prov +48 -0
- data/examples/primer-full.prov +120 -0
- data/examples/primer.prov +66 -0
- data/examples/prov_dsl.prov +85 -0
- data/examples/safe_gen.rb +7 -0
- data/examples/visualization/primer.prov +66 -0
- data/examples/visualization/prov_viz.rb +140 -0
- data/examples/visualization/viz.rb +35 -0
- data/features/create_generator.feature +21 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/metadata.feature +37 -0
- data/features/metadata_steps.rb +40 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/prov_dsl.feature +14 -0
- data/features/prov_dsl_steps.rb +11 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +61 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +14 -0
- data/features/writer_steps.rb +24 -0
- data/lib/bio-publisci.rb +64 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/datacube_model.rb +111 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/configuration.rb +31 -0
- data/lib/bio-publisci/dataset/data_cube.rb +418 -0
- data/lib/bio-publisci/dataset/dataset.rb +11 -0
- data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
- data/lib/bio-publisci/dataset/interactive.rb +72 -0
- data/lib/bio-publisci/dsl/config.rb +34 -0
- data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
- data/lib/bio-publisci/dsl/dsl.rb +72 -0
- data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
- data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
- data/lib/bio-publisci/metadata/generator.rb +323 -0
- data/lib/bio-publisci/metadata/metadata.rb +5 -0
- data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
- data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
- data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
- data/lib/bio-publisci/metadata/prov/association.rb +107 -0
- data/lib/bio-publisci/metadata/prov/config.rb +34 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
- data/lib/bio-publisci/metadata/prov/element.rb +120 -0
- data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
- data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
- data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
- data/lib/bio-publisci/metadata/prov/role.rb +40 -0
- data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
- data/lib/bio-publisci/metadata/publisher.rb +25 -0
- data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
- data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
- data/lib/bio-publisci/mixins/registry.rb +27 -0
- data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
- data/lib/bio-publisci/output.rb +27 -0
- data/lib/bio-publisci/parser.rb +266 -0
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +123 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +49 -0
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +88 -0
- data/lib/bio-publisci/readers/dataframe.rb +67 -0
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +112 -0
- data/lib/bio-publisci/readers/r_matrix.rb +176 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +91 -0
- data/lib/bio-publisci/writers/base.rb +93 -0
- data/lib/bio-publisci/writers/csv.rb +31 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak.rb +12 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/code_resources.rq +10 -0
- data/resources/queries/codes.rq +18 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +12 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/measures.rq +12 -0
- data/resources/queries/observation_labels.rq +8 -0
- data/resources/queries/observations.rq +13 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/resources/queries/properties.rq +8 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +28 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +6 -0
- data/scripts/scan_islet.rb +6 -0
- data/scripts/update_reference.rb +25 -0
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +33 -0
- data/spec/ORM/prov_model_spec.rb +72 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +169 -0
- data/spec/dataset_for_spec.rb +77 -0
- data/spec/dsl_spec.rb +134 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +51 -0
- data/spec/generators/r_matrix_spec.rb +44 -0
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/metadata/metadata_dsl_spec.rb +68 -0
- data/spec/prov/activity_spec.rb +74 -0
- data/spec/prov/agent_spec.rb +54 -0
- data/spec/prov/association_spec.rb +55 -0
- data/spec/prov/config_spec.rb +28 -0
- data/spec/prov/derivation_spec.rb +30 -0
- data/spec/prov/entity_spec.rb +52 -0
- data/spec/prov/role_spec.rb +94 -0
- data/spec/prov/usage_spec.rb +98 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/resource/.RData +0 -0
- data/spec/resource/example.Rhistory +3 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +147 -0
- data/spec/turtle/reference +2064 -0
- data/spec/turtle/weather +275 -0
- data/spec/writer_spec.rb +75 -0
- metadata +589 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
require_relative '../../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
describe PubliSci::ORM do
|
4
|
+
|
5
|
+
it "should load and save a turtle file without loss of information in old ORM" do
|
6
|
+
pending("pending rewrite of abbreviaton method to account for base_url")
|
7
|
+
ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
|
8
|
+
cube = PubliSci::DataSet::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
|
9
|
+
cube.abbreviate_known(cube.to_n3).should == ref
|
10
|
+
# cube.to_n3.should == ref
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should load properties for Observation object" do
|
14
|
+
ev = PubliSci::DSL::Instance.new
|
15
|
+
r = ev.instance_eval do
|
16
|
+
data do
|
17
|
+
object 'spec/csv/bacon.csv'
|
18
|
+
end
|
19
|
+
|
20
|
+
to_repository
|
21
|
+
end
|
22
|
+
Spira.add_repository :default, r
|
23
|
+
|
24
|
+
PubliSci::ORM::Observation.count.should > 0
|
25
|
+
|
26
|
+
PubliSci::ORM::Observation.first.load_properties
|
27
|
+
fi = PubliSci::ORM::Observation.first
|
28
|
+
fi.chunkiness.should_not be nil
|
29
|
+
fi.deliciousness.should_not be nil
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require_relative '../../lib/bio-publisci.rb'
|
2
|
+
include PubliSci::Prov::DSL
|
3
|
+
# include PubliSci::Prov
|
4
|
+
|
5
|
+
describe PubliSci::Prov::Model do
|
6
|
+
it "can be loaded from" do
|
7
|
+
ev = PubliSci::Prov::DSL::Instance.new
|
8
|
+
r = ev.instance_eval do
|
9
|
+
entity :datathing
|
10
|
+
|
11
|
+
activity :process, generated: :datathing
|
12
|
+
|
13
|
+
to_repository
|
14
|
+
end
|
15
|
+
|
16
|
+
Spira.add_repository :default, r
|
17
|
+
PubliSci::Prov::Model::Entity.first.should_not be nil
|
18
|
+
end
|
19
|
+
|
20
|
+
context "has useful methods built in to models" do
|
21
|
+
it "can reverse chain associated activities for agents" do
|
22
|
+
ev = PubliSci::Prov::DSL::Instance.new
|
23
|
+
|
24
|
+
ag = ev.instance_eval do
|
25
|
+
agent :some_dudette
|
26
|
+
end
|
27
|
+
|
28
|
+
act = ev.instance_eval do
|
29
|
+
entity :datathing
|
30
|
+
|
31
|
+
activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
|
32
|
+
end
|
33
|
+
|
34
|
+
r = ev.instance_eval do
|
35
|
+
to_repository
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
# z= ev.instance_eval do
|
40
|
+
# generate_n3
|
41
|
+
# end
|
42
|
+
|
43
|
+
Spira.add_repository :default, r
|
44
|
+
model_agent = PubliSci::Prov::Model::Agent.first
|
45
|
+
ag.subject.should == model_agent.subject
|
46
|
+
acts = model_agent.activities
|
47
|
+
acts.first.subject.should == act.subject
|
48
|
+
end
|
49
|
+
|
50
|
+
it "can dump all types for Entities" do
|
51
|
+
ev = PubliSci::Prov::DSL::Instance.new
|
52
|
+
|
53
|
+
qb = RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
|
54
|
+
|
55
|
+
r = ev.instance_eval do
|
56
|
+
agent :some_dudette
|
57
|
+
|
58
|
+
entity :datathing do
|
59
|
+
has RDF.type, qb.DataSet
|
60
|
+
end
|
61
|
+
|
62
|
+
activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
|
63
|
+
|
64
|
+
to_repository
|
65
|
+
end
|
66
|
+
|
67
|
+
Spira.add_repository :default, r
|
68
|
+
PubliSci::Prov::Model::Entity.first.all_types.should == %w{http://www.w3.org/ns/prov#Entity http://purl.org/linked-data/cube#DataSet}
|
69
|
+
PubliSci::Prov::Model::Entity.first.has_data?.should == true
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
describe PubliSci::Analyzer do
|
4
|
+
class Ana
|
5
|
+
include PubliSci::Analyzer
|
6
|
+
end
|
7
|
+
|
8
|
+
before(:all) do
|
9
|
+
@analyzer = Ana.new
|
10
|
+
|
11
|
+
@measures = ['chunkiness','deliciousness']
|
12
|
+
@dimensions = ['producer', 'pricerange']
|
13
|
+
@labels = %w(hormel newskies whys)
|
14
|
+
@data =
|
15
|
+
{
|
16
|
+
"producer" => ["hormel","newskies", "whys"],
|
17
|
+
"pricerange" => ["low", "medium", "nonexistant"],
|
18
|
+
"chunkiness"=> [1, 6, 9001],
|
19
|
+
"deliciousness"=> [1, 9, 6]
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should run a basic validation" do
|
24
|
+
newdata = []
|
25
|
+
|
26
|
+
@data.keys.size.times{|i|
|
27
|
+
obs = {}
|
28
|
+
@data.map{|k,v|
|
29
|
+
obs[k] = v[i]
|
30
|
+
}
|
31
|
+
newdata << obs
|
32
|
+
}
|
33
|
+
|
34
|
+
@analyzer.check_integrity(newdata, @measures, @dimensions)
|
35
|
+
end
|
36
|
+
end
|
data/spec/bnode_spec.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
|
4
|
+
describe "DataCube Node Generation" do
|
5
|
+
|
6
|
+
context "with Plain Old Ruby objects" do
|
7
|
+
#define a temporary class to use module methods
|
8
|
+
before(:all) do
|
9
|
+
class Gen
|
10
|
+
include PubliSci::Dataset::DataCube
|
11
|
+
end
|
12
|
+
|
13
|
+
@generator = Gen.new
|
14
|
+
@measures = ['chunkiness','deliciousness']
|
15
|
+
@dimensions = ['producer', 'pricerange']
|
16
|
+
@codes = @dimensions #all dimensions coded for the tests
|
17
|
+
@labels = %w(hormel newskies whys)
|
18
|
+
@data =
|
19
|
+
{
|
20
|
+
"producer" => ["hormel","newskies", "whys"],
|
21
|
+
"pricerange" => ["low", "medium", "nonexistant"],
|
22
|
+
"chunkiness"=> [1, 6, 9001],
|
23
|
+
"deliciousness"=> [1, 9, 6]
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
it "represents nested arrays using blank nodes" do
|
28
|
+
newdata = Hash[@data.map{|k,v| [k,v.first] }]
|
29
|
+
newdata.keys.each{|k| newdata[k] =[[["a", "rdf:Property"],["<http://semanticscience.org/resource/SIO_000300>", newdata[k]]]] }
|
30
|
+
observations = @generator.observations(@measures, [], [], newdata, @labels[0], "bacon")
|
31
|
+
observations.is_a?(Array).should == true
|
32
|
+
# puts observations.first.class
|
33
|
+
observations.first.is_a?(String).should == true
|
34
|
+
# puts observations
|
35
|
+
# observations.first[%r{\[ a rdf:Property ;\n<http://semanticscience.org/resource/SIO_000300> 1 \n \]}].should_not be nil
|
36
|
+
end
|
37
|
+
|
38
|
+
it "can nest arrays to some depth" do
|
39
|
+
newdata = Hash[@data.map{|k,v| [k,v.first] }]
|
40
|
+
newdata.keys.each{|k|
|
41
|
+
if ["producer","chunkiness"].include? k
|
42
|
+
newdata[k] = [
|
43
|
+
[
|
44
|
+
["a", "rdf:MacGuffin"] ,
|
45
|
+
[
|
46
|
+
"<http://semanticscience.org/resource/SIO_000300>",
|
47
|
+
[
|
48
|
+
["a", "rdf:Absurdity"],
|
49
|
+
[ 'rdf:value', newdata[k] ]
|
50
|
+
]
|
51
|
+
]
|
52
|
+
]]
|
53
|
+
|
54
|
+
end
|
55
|
+
}
|
56
|
+
|
57
|
+
observations = @generator.observations(@measures, @dimensions, [], newdata, @labels[0], "bacon")
|
58
|
+
observations.is_a?(Array).should == true
|
59
|
+
observations.first.is_a?(String).should == true
|
60
|
+
# observations.first.count('[').should == 4
|
61
|
+
# observations.first.count(']').should == 4
|
62
|
+
|
63
|
+
# observations.first[%r{\[ a rdf:Property ;\n <http://semanticscience.org/resource/SIO_000300> 1 \]}].should_not be nil
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/spec/csv/bacon.csv
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
# require_relative '../lib/r2rdf/data_cube.rb'
|
2
|
+
# require_relative '../lib/r2rdf/generators/dataframe.rb'
|
3
|
+
# require_relative '../lib/r2rdf/r_client.rb'
|
4
|
+
# require_relative '../lib/r2rdf/r_builder.rb'
|
5
|
+
# require_relative '../lib/r2rdf/generators/csv.rb'
|
6
|
+
|
7
|
+
require_relative '../lib/bio-publisci.rb'
|
8
|
+
|
9
|
+
|
10
|
+
describe PubliSci::Dataset::DataCube do
|
11
|
+
|
12
|
+
context "with Plain Old Ruby objects" do
|
13
|
+
#define a temporary class to use module methods
|
14
|
+
before(:all) do
|
15
|
+
class Gen
|
16
|
+
include PubliSci::Dataset::DataCube
|
17
|
+
end
|
18
|
+
|
19
|
+
@generator = Gen.new
|
20
|
+
@measures = ['chunkiness','deliciousness']
|
21
|
+
@dimensions = ['producer', 'pricerange']
|
22
|
+
@codes = @dimensions #all dimensions coded for the tests
|
23
|
+
@labels = %w(hormel newskies whys)
|
24
|
+
@data =
|
25
|
+
{
|
26
|
+
"producer" => ["hormel","newskies", "whys"],
|
27
|
+
"pricerange" => ["low", "medium", "nonexistant"],
|
28
|
+
"chunkiness"=> [1, 6, 9001],
|
29
|
+
"deliciousness"=> [1, 9, 6]
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should have correct output according to the reference file" do
|
34
|
+
|
35
|
+
turtle_string = @generator.generate(@measures, @dimensions, @codes, @data, @labels, 'bacon')
|
36
|
+
ref = IO.read(File.dirname(__FILE__) + '/turtle/bacon')
|
37
|
+
turtle_string.should == ref
|
38
|
+
end
|
39
|
+
|
40
|
+
context "with missing values" do
|
41
|
+
|
42
|
+
before(:all) do
|
43
|
+
@missing_data = Marshal.load(Marshal.dump(@data))
|
44
|
+
missingobs = {
|
45
|
+
"producer" => "missingbacon",
|
46
|
+
"pricerange" => "unknown",
|
47
|
+
"chunkiness"=> nil,
|
48
|
+
"deliciousness"=> nil,
|
49
|
+
}
|
50
|
+
missingobs.map{|k,v| @missing_data[k] << v}
|
51
|
+
end
|
52
|
+
|
53
|
+
it "skips observations with missing values by default" do
|
54
|
+
turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
|
55
|
+
turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
|
56
|
+
turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should be nil
|
57
|
+
end
|
58
|
+
|
59
|
+
it "includes observations with missing values if flag is set" do
|
60
|
+
turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon',{encode_nulls: true})
|
61
|
+
turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
|
62
|
+
turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should_not be nil
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'generates prefixes' do
|
68
|
+
prefixes = @generator.prefixes('bacon')
|
69
|
+
prefixes.is_a?(String).should == true
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'generates data structure definition' do
|
73
|
+
dsd = @generator.data_structure_definition(@measures, @dimensions, @codes, "bacon")
|
74
|
+
dsd.is_a?(String).should == true
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'generates dataset' do
|
78
|
+
dsd = @generator.dataset("bacon")
|
79
|
+
dsd.is_a?(String).should == true
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'generates component specifications' do
|
83
|
+
components = @generator.component_specifications(@measures , @dimensions, @codes, "bacon")
|
84
|
+
components.is_a?(Array).should == true
|
85
|
+
components.first.is_a?(String).should == true
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'generates dimension properties' do
|
89
|
+
dimensions = @generator.dimension_properties(@dimensions,@codes,"bacon")
|
90
|
+
dimensions.is_a?(Array).should == true
|
91
|
+
dimensions.first.is_a?(String).should == true
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'generates measure properties' do
|
95
|
+
measures = @generator.measure_properties(@measures, "bacon")
|
96
|
+
measures.is_a?(Array).should == true
|
97
|
+
measures.first.is_a?(String).should == true
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'generates observations' do
|
101
|
+
#measures, dimensions, codes, var, observation_labels, data, options={}
|
102
|
+
|
103
|
+
observations = @generator.observations(@measures, @dimensions, @codes, @data, @labels, "bacon")
|
104
|
+
observations.is_a?(Array).should == true
|
105
|
+
observations.first.is_a?(String).should == true
|
106
|
+
end
|
107
|
+
|
108
|
+
it "coerces single values into arrays" do
|
109
|
+
newdata = Hash[@data.map{|k,v| [k,v.first] }]
|
110
|
+
observations = @generator.observations(@measures, @dimensions, @codes, newdata, @labels[0], "bacon")
|
111
|
+
observations.is_a?(Array).should == true
|
112
|
+
observations.first.is_a?(String).should == true
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
context "under official integrity constraints" do
|
118
|
+
before(:all) do
|
119
|
+
@graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
|
120
|
+
@checks = {}
|
121
|
+
Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
|
122
|
+
if file.split('.').last == 'rq'
|
123
|
+
@checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'obeys IC-1, has a unique dataset for each observation' do
|
129
|
+
SPARQL.execute(@checks['1'], @graph).first.should be_nil
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'obeys IC-2, has a unique data structure definition of each dataset' do
|
133
|
+
SPARQL.execute(@checks['2'], @graph).first.should be_nil
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'obeys IC-3, has a measure property specified for each dataset' do
|
137
|
+
SPARQL.execute(@checks['3'], @graph).first.should be_nil
|
138
|
+
end
|
139
|
+
|
140
|
+
it 'obeys IC-4, specifies a range for all dimensions' do
|
141
|
+
SPARQL.execute(@checks['4'], @graph).first.should be_nil
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
|
145
|
+
SPARQL.execute(@checks['5'], @graph).first.should be_nil
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'obeys IC-11, has a value for each dimension in every observation' do
|
149
|
+
SPARQL.execute(@checks['11'], @graph).first.should be_nil
|
150
|
+
end
|
151
|
+
|
152
|
+
## currently locks up. possible bug in SPARQL gem parsing?
|
153
|
+
## works fine as a raw query
|
154
|
+
# it 'obeys IC-12, has do duplicate observations' do
|
155
|
+
# SPARQL.execute(@checks['12'], @graph).first.should be_nil
|
156
|
+
# end
|
157
|
+
|
158
|
+
it 'obeys IC-14, has a value for each measure in every observation' do
|
159
|
+
SPARQL.execute(@checks['14'], @graph).first.should be_nil
|
160
|
+
end
|
161
|
+
|
162
|
+
it 'obeys IC-19, all codes for each codeList are included' do
|
163
|
+
SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
|
164
|
+
## second query for IC-19 uses property paths that aren't as easy to
|
165
|
+
## convert to sparql 1.0, so for now I've left it out
|
166
|
+
# SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
2
|
+
|
3
|
+
describe PubliSci::Dataset do
|
4
|
+
it "should use sio:has_value for unknown string types" do
|
5
|
+
pending("pending refactor dataset_for to handle raw remote files better")
|
6
|
+
turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
|
7
|
+
(turtle_string =~ /hasValue/).should_not be nil
|
8
|
+
# open('ttl.ttl','w'){|f| f.write turtle_string}
|
9
|
+
repo = RDF::Repository.new
|
10
|
+
|
11
|
+
f = Tempfile.new(['repo','.ttl'])
|
12
|
+
f.write(turtle_string)
|
13
|
+
f.close
|
14
|
+
repo.load(f.path, :format => :ttl)
|
15
|
+
f.unlink
|
16
|
+
|
17
|
+
repo.size.should > 0
|
18
|
+
end
|
19
|
+
|
20
|
+
it "can convert arff files" do
|
21
|
+
turtle_string = PubliSci::Dataset.for('resources/weather.numeric.arff',false)
|
22
|
+
turtle_string.should == IO.read('spec/turtle/weather')
|
23
|
+
end
|
24
|
+
|
25
|
+
describe ".register_reader" do
|
26
|
+
it "can register readers to be used by Dataset.for" do
|
27
|
+
PubliSci::Dataset.reader_registry.clear
|
28
|
+
expect { PubliSci::Dataset.for('resources/maf_example.maf') }.to raise_error
|
29
|
+
PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF)
|
30
|
+
file = PubliSci::Dataset.for('resources/maf_example.maf')
|
31
|
+
str = IO.read(file)
|
32
|
+
File.delete(file.path)
|
33
|
+
str.size.should > 0
|
34
|
+
(str =~ /qb:Observation/).should_not be nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context 'with a csv file' do
|
39
|
+
before(:all) do
|
40
|
+
@file = File.dirname(__FILE__) + '/csv/bacon.csv'
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should load with no prompts if all details are specified" do
|
44
|
+
turtle_string = PubliSci::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
|
45
|
+
(turtle_string =~ /qb:Observation/).should_not be nil
|
46
|
+
end
|
47
|
+
|
48
|
+
it "will download remote files" do
|
49
|
+
turtle_string = PubliSci::Dataset.for('https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv',false)
|
50
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
51
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
52
|
+
end
|
53
|
+
|
54
|
+
it "will request user input if not provided" do
|
55
|
+
gen = PubliSci::Readers::CSV.new
|
56
|
+
gen.stub(:gets).and_return('pricerange,producer')
|
57
|
+
gen.stub(:puts)
|
58
|
+
turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
|
59
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
60
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
61
|
+
end
|
62
|
+
|
63
|
+
it "will try to guess if told not to be interactive" do
|
64
|
+
turtle_string = PubliSci::Dataset.for(@file,false)
|
65
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
66
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
67
|
+
end
|
68
|
+
|
69
|
+
it "will attempt to load remote file if given URI" do
|
70
|
+
loc = 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
|
71
|
+
turtle_string = PubliSci::Dataset.for(loc,false)
|
72
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
73
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|