publisci 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +36 -0
- data/LICENSE.txt +20 -0
- data/README.md +51 -0
- data/README.rdoc +48 -0
- data/Rakefile +68 -0
- data/bin/bio-publisci +106 -0
- data/bin/bio-publisci-server +50 -0
- data/examples/bio-band_integration.rb +9 -0
- data/examples/no_magic.prov +58 -0
- data/examples/no_magic.rb +58 -0
- data/examples/orm.prov +48 -0
- data/examples/primer-full.prov +120 -0
- data/examples/primer.prov +66 -0
- data/examples/prov_dsl.prov +85 -0
- data/examples/safe_gen.rb +7 -0
- data/examples/visualization/primer.prov +66 -0
- data/examples/visualization/prov_viz.rb +140 -0
- data/examples/visualization/viz.rb +35 -0
- data/features/create_generator.feature +21 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/metadata.feature +37 -0
- data/features/metadata_steps.rb +40 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/prov_dsl.feature +14 -0
- data/features/prov_dsl_steps.rb +11 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +61 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +14 -0
- data/features/writer_steps.rb +24 -0
- data/lib/bio-publisci.rb +64 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/datacube_model.rb +111 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/configuration.rb +31 -0
- data/lib/bio-publisci/dataset/data_cube.rb +418 -0
- data/lib/bio-publisci/dataset/dataset.rb +11 -0
- data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
- data/lib/bio-publisci/dataset/interactive.rb +72 -0
- data/lib/bio-publisci/dsl/config.rb +34 -0
- data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
- data/lib/bio-publisci/dsl/dsl.rb +72 -0
- data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
- data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
- data/lib/bio-publisci/metadata/generator.rb +323 -0
- data/lib/bio-publisci/metadata/metadata.rb +5 -0
- data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
- data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
- data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
- data/lib/bio-publisci/metadata/prov/association.rb +107 -0
- data/lib/bio-publisci/metadata/prov/config.rb +34 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
- data/lib/bio-publisci/metadata/prov/element.rb +120 -0
- data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
- data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
- data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
- data/lib/bio-publisci/metadata/prov/role.rb +40 -0
- data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
- data/lib/bio-publisci/metadata/publisher.rb +25 -0
- data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
- data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
- data/lib/bio-publisci/mixins/registry.rb +27 -0
- data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
- data/lib/bio-publisci/output.rb +27 -0
- data/lib/bio-publisci/parser.rb +266 -0
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +123 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +49 -0
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +88 -0
- data/lib/bio-publisci/readers/dataframe.rb +67 -0
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +112 -0
- data/lib/bio-publisci/readers/r_matrix.rb +176 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +91 -0
- data/lib/bio-publisci/writers/base.rb +93 -0
- data/lib/bio-publisci/writers/csv.rb +31 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak.rb +12 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/code_resources.rq +10 -0
- data/resources/queries/codes.rq +18 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +12 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/measures.rq +12 -0
- data/resources/queries/observation_labels.rq +8 -0
- data/resources/queries/observations.rq +13 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/resources/queries/properties.rq +8 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +28 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +6 -0
- data/scripts/scan_islet.rb +6 -0
- data/scripts/update_reference.rb +25 -0
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +33 -0
- data/spec/ORM/prov_model_spec.rb +72 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +169 -0
- data/spec/dataset_for_spec.rb +77 -0
- data/spec/dsl_spec.rb +134 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +51 -0
- data/spec/generators/r_matrix_spec.rb +44 -0
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/metadata/metadata_dsl_spec.rb +68 -0
- data/spec/prov/activity_spec.rb +74 -0
- data/spec/prov/agent_spec.rb +54 -0
- data/spec/prov/association_spec.rb +55 -0
- data/spec/prov/config_spec.rb +28 -0
- data/spec/prov/derivation_spec.rb +30 -0
- data/spec/prov/entity_spec.rb +52 -0
- data/spec/prov/role_spec.rb +94 -0
- data/spec/prov/usage_spec.rb +98 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/resource/.RData +0 -0
- data/spec/resource/example.Rhistory +3 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +147 -0
- data/spec/turtle/reference +2064 -0
- data/spec/turtle/weather +275 -0
- data/spec/writer_spec.rb +75 -0
- metadata +589 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require_relative '../../lib/bio-publisci.rb'
|
|
2
|
+
|
|
3
|
+
describe PubliSci::ORM do
|
|
4
|
+
|
|
5
|
+
it "should load and save a turtle file without loss of information in old ORM" do
|
|
6
|
+
pending("pending rewrite of abbreviaton method to account for base_url")
|
|
7
|
+
ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
|
|
8
|
+
cube = PubliSci::DataSet::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
|
|
9
|
+
cube.abbreviate_known(cube.to_n3).should == ref
|
|
10
|
+
# cube.to_n3.should == ref
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "should load properties for Observation object" do
|
|
14
|
+
ev = PubliSci::DSL::Instance.new
|
|
15
|
+
r = ev.instance_eval do
|
|
16
|
+
data do
|
|
17
|
+
object 'spec/csv/bacon.csv'
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
to_repository
|
|
21
|
+
end
|
|
22
|
+
Spira.add_repository :default, r
|
|
23
|
+
|
|
24
|
+
PubliSci::ORM::Observation.count.should > 0
|
|
25
|
+
|
|
26
|
+
PubliSci::ORM::Observation.first.load_properties
|
|
27
|
+
fi = PubliSci::ORM::Observation.first
|
|
28
|
+
fi.chunkiness.should_not be nil
|
|
29
|
+
fi.deliciousness.should_not be nil
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
require_relative '../../lib/bio-publisci.rb'
|
|
2
|
+
include PubliSci::Prov::DSL
|
|
3
|
+
# include PubliSci::Prov
|
|
4
|
+
|
|
5
|
+
describe PubliSci::Prov::Model do
|
|
6
|
+
it "can be loaded from" do
|
|
7
|
+
ev = PubliSci::Prov::DSL::Instance.new
|
|
8
|
+
r = ev.instance_eval do
|
|
9
|
+
entity :datathing
|
|
10
|
+
|
|
11
|
+
activity :process, generated: :datathing
|
|
12
|
+
|
|
13
|
+
to_repository
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
Spira.add_repository :default, r
|
|
17
|
+
PubliSci::Prov::Model::Entity.first.should_not be nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
context "has useful methods built in to models" do
|
|
21
|
+
it "can reverse chain associated activities for agents" do
|
|
22
|
+
ev = PubliSci::Prov::DSL::Instance.new
|
|
23
|
+
|
|
24
|
+
ag = ev.instance_eval do
|
|
25
|
+
agent :some_dudette
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
act = ev.instance_eval do
|
|
29
|
+
entity :datathing
|
|
30
|
+
|
|
31
|
+
activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
r = ev.instance_eval do
|
|
35
|
+
to_repository
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# z= ev.instance_eval do
|
|
40
|
+
# generate_n3
|
|
41
|
+
# end
|
|
42
|
+
|
|
43
|
+
Spira.add_repository :default, r
|
|
44
|
+
model_agent = PubliSci::Prov::Model::Agent.first
|
|
45
|
+
ag.subject.should == model_agent.subject
|
|
46
|
+
acts = model_agent.activities
|
|
47
|
+
acts.first.subject.should == act.subject
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it "can dump all types for Entities" do
|
|
51
|
+
ev = PubliSci::Prov::DSL::Instance.new
|
|
52
|
+
|
|
53
|
+
qb = RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
|
|
54
|
+
|
|
55
|
+
r = ev.instance_eval do
|
|
56
|
+
agent :some_dudette
|
|
57
|
+
|
|
58
|
+
entity :datathing do
|
|
59
|
+
has RDF.type, qb.DataSet
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
activity :process, generated: :datathing, wasAssociatedWith: :some_dudette
|
|
63
|
+
|
|
64
|
+
to_repository
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
Spira.add_repository :default, r
|
|
68
|
+
PubliSci::Prov::Model::Entity.first.all_types.should == %w{http://www.w3.org/ns/prov#Entity http://purl.org/linked-data/cube#DataSet}
|
|
69
|
+
PubliSci::Prov::Model::Entity.first.has_data?.should == true
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
|
2
|
+
|
|
3
|
+
describe PubliSci::Analyzer do
|
|
4
|
+
class Ana
|
|
5
|
+
include PubliSci::Analyzer
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
before(:all) do
|
|
9
|
+
@analyzer = Ana.new
|
|
10
|
+
|
|
11
|
+
@measures = ['chunkiness','deliciousness']
|
|
12
|
+
@dimensions = ['producer', 'pricerange']
|
|
13
|
+
@labels = %w(hormel newskies whys)
|
|
14
|
+
@data =
|
|
15
|
+
{
|
|
16
|
+
"producer" => ["hormel","newskies", "whys"],
|
|
17
|
+
"pricerange" => ["low", "medium", "nonexistant"],
|
|
18
|
+
"chunkiness"=> [1, 6, 9001],
|
|
19
|
+
"deliciousness"=> [1, 9, 6]
|
|
20
|
+
}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "should run a basic validation" do
|
|
24
|
+
newdata = []
|
|
25
|
+
|
|
26
|
+
@data.keys.size.times{|i|
|
|
27
|
+
obs = {}
|
|
28
|
+
@data.map{|k,v|
|
|
29
|
+
obs[k] = v[i]
|
|
30
|
+
}
|
|
31
|
+
newdata << obs
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
@analyzer.check_integrity(newdata, @measures, @dimensions)
|
|
35
|
+
end
|
|
36
|
+
end
|
data/spec/bnode_spec.rb
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
describe "DataCube Node Generation" do
|
|
5
|
+
|
|
6
|
+
context "with Plain Old Ruby objects" do
|
|
7
|
+
#define a temporary class to use module methods
|
|
8
|
+
before(:all) do
|
|
9
|
+
class Gen
|
|
10
|
+
include PubliSci::Dataset::DataCube
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
@generator = Gen.new
|
|
14
|
+
@measures = ['chunkiness','deliciousness']
|
|
15
|
+
@dimensions = ['producer', 'pricerange']
|
|
16
|
+
@codes = @dimensions #all dimensions coded for the tests
|
|
17
|
+
@labels = %w(hormel newskies whys)
|
|
18
|
+
@data =
|
|
19
|
+
{
|
|
20
|
+
"producer" => ["hormel","newskies", "whys"],
|
|
21
|
+
"pricerange" => ["low", "medium", "nonexistant"],
|
|
22
|
+
"chunkiness"=> [1, 6, 9001],
|
|
23
|
+
"deliciousness"=> [1, 9, 6]
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "represents nested arrays using blank nodes" do
|
|
28
|
+
newdata = Hash[@data.map{|k,v| [k,v.first] }]
|
|
29
|
+
newdata.keys.each{|k| newdata[k] =[[["a", "rdf:Property"],["<http://semanticscience.org/resource/SIO_000300>", newdata[k]]]] }
|
|
30
|
+
observations = @generator.observations(@measures, [], [], newdata, @labels[0], "bacon")
|
|
31
|
+
observations.is_a?(Array).should == true
|
|
32
|
+
# puts observations.first.class
|
|
33
|
+
observations.first.is_a?(String).should == true
|
|
34
|
+
# puts observations
|
|
35
|
+
# observations.first[%r{\[ a rdf:Property ;\n<http://semanticscience.org/resource/SIO_000300> 1 \n \]}].should_not be nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it "can nest arrays to some depth" do
|
|
39
|
+
newdata = Hash[@data.map{|k,v| [k,v.first] }]
|
|
40
|
+
newdata.keys.each{|k|
|
|
41
|
+
if ["producer","chunkiness"].include? k
|
|
42
|
+
newdata[k] = [
|
|
43
|
+
[
|
|
44
|
+
["a", "rdf:MacGuffin"] ,
|
|
45
|
+
[
|
|
46
|
+
"<http://semanticscience.org/resource/SIO_000300>",
|
|
47
|
+
[
|
|
48
|
+
["a", "rdf:Absurdity"],
|
|
49
|
+
[ 'rdf:value', newdata[k] ]
|
|
50
|
+
]
|
|
51
|
+
]
|
|
52
|
+
]]
|
|
53
|
+
|
|
54
|
+
end
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
observations = @generator.observations(@measures, @dimensions, [], newdata, @labels[0], "bacon")
|
|
58
|
+
observations.is_a?(Array).should == true
|
|
59
|
+
observations.first.is_a?(String).should == true
|
|
60
|
+
# observations.first.count('[').should == 4
|
|
61
|
+
# observations.first.count(']').should == 4
|
|
62
|
+
|
|
63
|
+
# observations.first[%r{\[ a rdf:Property ;\n <http://semanticscience.org/resource/SIO_000300> 1 \]}].should_not be nil
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
data/spec/csv/bacon.csv
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# require_relative '../lib/r2rdf/data_cube.rb'
|
|
2
|
+
# require_relative '../lib/r2rdf/generators/dataframe.rb'
|
|
3
|
+
# require_relative '../lib/r2rdf/r_client.rb'
|
|
4
|
+
# require_relative '../lib/r2rdf/r_builder.rb'
|
|
5
|
+
# require_relative '../lib/r2rdf/generators/csv.rb'
|
|
6
|
+
|
|
7
|
+
require_relative '../lib/bio-publisci.rb'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
describe PubliSci::Dataset::DataCube do
|
|
11
|
+
|
|
12
|
+
context "with Plain Old Ruby objects" do
|
|
13
|
+
#define a temporary class to use module methods
|
|
14
|
+
before(:all) do
|
|
15
|
+
class Gen
|
|
16
|
+
include PubliSci::Dataset::DataCube
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
@generator = Gen.new
|
|
20
|
+
@measures = ['chunkiness','deliciousness']
|
|
21
|
+
@dimensions = ['producer', 'pricerange']
|
|
22
|
+
@codes = @dimensions #all dimensions coded for the tests
|
|
23
|
+
@labels = %w(hormel newskies whys)
|
|
24
|
+
@data =
|
|
25
|
+
{
|
|
26
|
+
"producer" => ["hormel","newskies", "whys"],
|
|
27
|
+
"pricerange" => ["low", "medium", "nonexistant"],
|
|
28
|
+
"chunkiness"=> [1, 6, 9001],
|
|
29
|
+
"deliciousness"=> [1, 9, 6]
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "should have correct output according to the reference file" do
|
|
34
|
+
|
|
35
|
+
turtle_string = @generator.generate(@measures, @dimensions, @codes, @data, @labels, 'bacon')
|
|
36
|
+
ref = IO.read(File.dirname(__FILE__) + '/turtle/bacon')
|
|
37
|
+
turtle_string.should == ref
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
context "with missing values" do
|
|
41
|
+
|
|
42
|
+
before(:all) do
|
|
43
|
+
@missing_data = Marshal.load(Marshal.dump(@data))
|
|
44
|
+
missingobs = {
|
|
45
|
+
"producer" => "missingbacon",
|
|
46
|
+
"pricerange" => "unknown",
|
|
47
|
+
"chunkiness"=> nil,
|
|
48
|
+
"deliciousness"=> nil,
|
|
49
|
+
}
|
|
50
|
+
missingobs.map{|k,v| @missing_data[k] << v}
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it "skips observations with missing values by default" do
|
|
54
|
+
turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
|
|
55
|
+
turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
|
|
56
|
+
turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should be nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "includes observations with missing values if flag is set" do
|
|
60
|
+
turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon',{encode_nulls: true})
|
|
61
|
+
turtle_string[/.*obsmissingbacon.*\n/].should_not be nil
|
|
62
|
+
turtle_string[/\S+obsmissingbacon.*prop:chunkiness.*\n\n/m].should_not be nil
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it 'generates prefixes' do
|
|
68
|
+
prefixes = @generator.prefixes('bacon')
|
|
69
|
+
prefixes.is_a?(String).should == true
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'generates data structure definition' do
|
|
73
|
+
dsd = @generator.data_structure_definition(@measures, @dimensions, @codes, "bacon")
|
|
74
|
+
dsd.is_a?(String).should == true
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it 'generates dataset' do
|
|
78
|
+
dsd = @generator.dataset("bacon")
|
|
79
|
+
dsd.is_a?(String).should == true
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it 'generates component specifications' do
|
|
83
|
+
components = @generator.component_specifications(@measures , @dimensions, @codes, "bacon")
|
|
84
|
+
components.is_a?(Array).should == true
|
|
85
|
+
components.first.is_a?(String).should == true
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it 'generates dimension properties' do
|
|
89
|
+
dimensions = @generator.dimension_properties(@dimensions,@codes,"bacon")
|
|
90
|
+
dimensions.is_a?(Array).should == true
|
|
91
|
+
dimensions.first.is_a?(String).should == true
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it 'generates measure properties' do
|
|
95
|
+
measures = @generator.measure_properties(@measures, "bacon")
|
|
96
|
+
measures.is_a?(Array).should == true
|
|
97
|
+
measures.first.is_a?(String).should == true
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
it 'generates observations' do
|
|
101
|
+
#measures, dimensions, codes, var, observation_labels, data, options={}
|
|
102
|
+
|
|
103
|
+
observations = @generator.observations(@measures, @dimensions, @codes, @data, @labels, "bacon")
|
|
104
|
+
observations.is_a?(Array).should == true
|
|
105
|
+
observations.first.is_a?(String).should == true
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it "coerces single values into arrays" do
|
|
109
|
+
newdata = Hash[@data.map{|k,v| [k,v.first] }]
|
|
110
|
+
observations = @generator.observations(@measures, @dimensions, @codes, newdata, @labels[0], "bacon")
|
|
111
|
+
observations.is_a?(Array).should == true
|
|
112
|
+
observations.first.is_a?(String).should == true
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
context "under official integrity constraints" do
|
|
118
|
+
before(:all) do
|
|
119
|
+
@graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
|
|
120
|
+
@checks = {}
|
|
121
|
+
Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
|
|
122
|
+
if file.split('.').last == 'rq'
|
|
123
|
+
@checks[file.split('.').first] = IO.read(File.dirname(__FILE__) + '/queries/integrity/' + file)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
it 'obeys IC-1, has a unique dataset for each observation' do
|
|
129
|
+
SPARQL.execute(@checks['1'], @graph).first.should be_nil
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
it 'obeys IC-2, has a unique data structure definition of each dataset' do
|
|
133
|
+
SPARQL.execute(@checks['2'], @graph).first.should be_nil
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it 'obeys IC-3, has a measure property specified for each dataset' do
|
|
137
|
+
SPARQL.execute(@checks['3'], @graph).first.should be_nil
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
it 'obeys IC-4, specifies a range for all dimensions' do
|
|
141
|
+
SPARQL.execute(@checks['4'], @graph).first.should be_nil
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
it 'obeys IC-5, every dimension with range skos:Concept must have a qb:codeList' do
|
|
145
|
+
SPARQL.execute(@checks['5'], @graph).first.should be_nil
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
it 'obeys IC-11, has a value for each dimension in every observation' do
|
|
149
|
+
SPARQL.execute(@checks['11'], @graph).first.should be_nil
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
## currently locks up. possible bug in SPARQL gem parsing?
|
|
153
|
+
## works fine as a raw query
|
|
154
|
+
# it 'obeys IC-12, has do duplicate observations' do
|
|
155
|
+
# SPARQL.execute(@checks['12'], @graph).first.should be_nil
|
|
156
|
+
# end
|
|
157
|
+
|
|
158
|
+
it 'obeys IC-14, has a value for each measure in every observation' do
|
|
159
|
+
SPARQL.execute(@checks['14'], @graph).first.should be_nil
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
it 'obeys IC-19, all codes for each codeList are included' do
|
|
163
|
+
SPARQL.execute(@checks['19_1'], @graph).first.should be_nil
|
|
164
|
+
## second query for IC-19 uses property paths that aren't as easy to
|
|
165
|
+
## convert to sparql 1.0, so for now I've left it out
|
|
166
|
+
# SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
require_relative '../lib/bio-publisci.rb'
|
|
2
|
+
|
|
3
|
+
describe PubliSci::Dataset do
|
|
4
|
+
it "should use sio:has_value for unknown string types" do
|
|
5
|
+
pending("pending refactor dataset_for to handle raw remote files better")
|
|
6
|
+
turtle_string = PubliSci::Dataset.for('http://www.biostat.wisc.edu/~kbroman/D3/cistrans/data/probe_data/probe497638.json',false)
|
|
7
|
+
(turtle_string =~ /hasValue/).should_not be nil
|
|
8
|
+
# open('ttl.ttl','w'){|f| f.write turtle_string}
|
|
9
|
+
repo = RDF::Repository.new
|
|
10
|
+
|
|
11
|
+
f = Tempfile.new(['repo','.ttl'])
|
|
12
|
+
f.write(turtle_string)
|
|
13
|
+
f.close
|
|
14
|
+
repo.load(f.path, :format => :ttl)
|
|
15
|
+
f.unlink
|
|
16
|
+
|
|
17
|
+
repo.size.should > 0
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "can convert arff files" do
|
|
21
|
+
turtle_string = PubliSci::Dataset.for('resources/weather.numeric.arff',false)
|
|
22
|
+
turtle_string.should == IO.read('spec/turtle/weather')
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
describe ".register_reader" do
|
|
26
|
+
it "can register readers to be used by Dataset.for" do
|
|
27
|
+
PubliSci::Dataset.reader_registry.clear
|
|
28
|
+
expect { PubliSci::Dataset.for('resources/maf_example.maf') }.to raise_error
|
|
29
|
+
PubliSci::Dataset.register_reader('.maf',PubliSci::Readers::MAF)
|
|
30
|
+
file = PubliSci::Dataset.for('resources/maf_example.maf')
|
|
31
|
+
str = IO.read(file)
|
|
32
|
+
File.delete(file.path)
|
|
33
|
+
str.size.should > 0
|
|
34
|
+
(str =~ /qb:Observation/).should_not be nil
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
context 'with a csv file' do
|
|
39
|
+
before(:all) do
|
|
40
|
+
@file = File.dirname(__FILE__) + '/csv/bacon.csv'
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it "should load with no prompts if all details are specified" do
|
|
44
|
+
turtle_string = PubliSci::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
|
|
45
|
+
(turtle_string =~ /qb:Observation/).should_not be nil
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "will download remote files" do
|
|
49
|
+
turtle_string = PubliSci::Dataset.for('https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv',false)
|
|
50
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
|
51
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "will request user input if not provided" do
|
|
55
|
+
gen = PubliSci::Readers::CSV.new
|
|
56
|
+
gen.stub(:gets).and_return('pricerange,producer')
|
|
57
|
+
gen.stub(:puts)
|
|
58
|
+
turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
|
|
59
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
|
60
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "will try to guess if told not to be interactive" do
|
|
64
|
+
turtle_string = PubliSci::Dataset.for(@file,false)
|
|
65
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
|
66
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "will attempt to load remote file if given URI" do
|
|
70
|
+
loc = 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
|
|
71
|
+
turtle_string = PubliSci::Dataset.for(loc,false)
|
|
72
|
+
(turtle_string =~ /prop:pricerange/).should_not be nil
|
|
73
|
+
(turtle_string =~ /prop:producer/).should_not be nil
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
end
|