bio-publisci 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +1 -1
  4. data/examples/prov_dsl.prov +2 -1
  5. data/examples/safe_gen.rb +7 -0
  6. data/examples/visualization/primer.prov +66 -0
  7. data/examples/visualization/prov_viz.rb +140 -0
  8. data/examples/visualization/viz.rb +35 -0
  9. data/features/metadata_steps.rb +2 -4
  10. data/features/orm_steps.rb +4 -4
  11. data/features/reader_steps.rb +1 -1
  12. data/features/store_steps.rb +1 -1
  13. data/features/writer.feature +1 -1
  14. data/features/writer_steps.rb +1 -1
  15. data/lib/bio-publisci.rb +10 -2
  16. data/lib/bio-publisci/analyzer.rb +4 -4
  17. data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
  18. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
  19. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
  20. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  21. data/lib/bio-publisci/dataset/data_cube.rb +28 -17
  22. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  23. data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
  24. data/lib/bio-publisci/dataset/interactive.rb +1 -1
  25. data/lib/bio-publisci/dsl/config.rb +34 -0
  26. data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
  27. data/lib/bio-publisci/dsl/dsl.rb +69 -0
  28. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  29. data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
  30. data/lib/bio-publisci/metadata/generator.rb +323 -0
  31. data/lib/bio-publisci/metadata/metadata.rb +3 -314
  32. data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
  33. data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
  34. data/lib/bio-publisci/metadata/prov/association.rb +2 -2
  35. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  36. data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
  37. data/lib/bio-publisci/metadata/prov/element.rb +2 -2
  38. data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
  39. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
  40. data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
  41. data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
  42. data/lib/bio-publisci/metadata/prov/role.rb +1 -1
  43. data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
  44. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  45. data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
  46. data/lib/bio-publisci/mixins/registry.rb +27 -0
  47. data/lib/bio-publisci/output.rb +1 -1
  48. data/lib/bio-publisci/parser.rb +1 -1
  49. data/lib/bio-publisci/query/query_helper.rb +14 -14
  50. data/lib/bio-publisci/r_client.rb +5 -5
  51. data/lib/bio-publisci/readers/arff.rb +5 -5
  52. data/lib/bio-publisci/readers/csv.rb +3 -3
  53. data/lib/bio-publisci/readers/dataframe.rb +3 -3
  54. data/lib/bio-publisci/readers/r_cross.rb +4 -4
  55. data/lib/bio-publisci/readers/r_matrix.rb +3 -3
  56. data/lib/bio-publisci/store.rb +3 -3
  57. data/lib/bio-publisci/writers/arff.rb +6 -6
  58. data/lib/bio-publisci/writers/dataframe.rb +5 -5
  59. data/scripts/islet_mlratio.rb +1 -1
  60. data/scripts/scan_islet.rb +1 -1
  61. data/scripts/update_reference.rb +2 -2
  62. data/spec/ORM/data_cube_orm_spec.rb +2 -2
  63. data/spec/ORM/prov_model_spec.rb +19 -0
  64. data/spec/analyzer_spec.rb +7 -7
  65. data/spec/data_cube_spec.rb +13 -13
  66. data/spec/dataset_for_spec.rb +11 -4
  67. data/spec/dsl_spec.rb +90 -0
  68. data/spec/generators/csv_spec.rb +4 -4
  69. data/spec/generators/dataframe_spec.rb +6 -6
  70. data/spec/generators/r_cross_spec.rb +2 -2
  71. data/spec/generators/r_matrix_spec.rb +2 -2
  72. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  73. data/spec/prov/activity_spec.rb +4 -4
  74. data/spec/prov/agent_spec.rb +3 -4
  75. data/spec/prov/association_spec.rb +1 -2
  76. data/spec/prov/config_spec.rb +28 -0
  77. data/spec/prov/derivation_spec.rb +30 -0
  78. data/spec/prov/entity_spec.rb +3 -4
  79. data/spec/prov/role_spec.rb +1 -2
  80. data/spec/prov/usage_spec.rb +1 -2
  81. data/spec/r_builder_spec.rb +3 -3
  82. data/spec/turtle/bacon +20 -4
  83. data/spec/turtle/reference +20 -4
  84. metadata +37 -4
@@ -1,4 +1,4 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module Writer
3
3
  module Dataframe
4
4
 
@@ -39,7 +39,7 @@ module R2RDF
39
39
  end
40
40
 
41
41
  class Builder
42
- include R2RDF::Writer::Dataframe
42
+ include PubliSci::Writer::Dataframe
43
43
 
44
44
 
45
45
  def from_turtle(turtle_file, connection, variable_in=nil, variable_out=nil, verbose=true, save=true)
@@ -51,7 +51,7 @@ module R2RDF
51
51
  repo = RDF::Repository.load(turtle_file)
52
52
  puts "loaded #{repo.size} statements into temporary repo" if verbose
53
53
  # connection = Rserve::Connection.new
54
- query = R2RDF::QueryHelper.new
54
+ query = PubliSci::QueryHelper.new
55
55
  rows = get_rownames(variable_in, query, repo)
56
56
  puts "frame has #{rows.size} rows" if verbose
57
57
 
@@ -69,8 +69,8 @@ module R2RDF
69
69
  end
70
70
  puts "connecting to endpoint at #{endpoint_url}" if verbose
71
71
  sparql = SPARQL::Client.new(endpoint_url)
72
- # client = R2RDF::Client.new
73
- query = R2RDF::QueryHelper.new
72
+ # client = PubliSci::Client.new
73
+ query = PubliSci::QueryHelper.new
74
74
 
75
75
  rows = query.get_ary(sparql.query(query.row_names(variable_in))).flatten
76
76
 
@@ -1,6 +1,6 @@
1
1
  load File.dirname(__FILE__) + '/../lib/bio-publisci.rb'
2
2
 
3
- gen = R2RDF::Reader::RMatrix.new
3
+ gen = PubliSci::Reader::RMatrix.new
4
4
  con = Rserve::Connection.new
5
5
  con.eval("load('#{ARGV[0] || './.RData'}')")
6
6
  gen.generate_n3(con, "islet.mlratio", "pheno", {measures: ["probe","individual","pheno"], no_labels: true})
@@ -1,6 +1,6 @@
1
1
  load File.dirname(__FILE__) + '/../lib/bio-publisci.rb'
2
2
 
3
- gen = R2RDF::Reader::RMatrix.new
3
+ gen = PubliSci::Reader::RMatrix.new
4
4
  con = Rserve::Connection.new
5
5
  con.eval("load('#{ARGV[0] || './.RData'}')")
6
6
  gen.generate_n3(con, "scan.islet", "scan", {measures: ["probe","marker","lod"], no_labels: true})
@@ -4,12 +4,12 @@ exit unless gets.chomp == 'y'
4
4
  puts "overwriting #{File.absolute_path(File.dirname(__FILE__) + '/../spec/turtle/bacon')}"
5
5
  load File.dirname(__FILE__) + '/../lib/bio-publisci.rb'
6
6
 
7
- gen = R2RDF::Reader::CSV.new
7
+ gen = PubliSci::Reader::CSV.new
8
8
  turtle_string = gen.generate_n3(File.dirname(__FILE__) + '/../spec/csv/bacon.csv','bacon',{dimensions:["producer","pricerange"], label_column:0})
9
9
  open(File.dirname(__FILE__) + '/../spec/turtle/bacon', 'w'){|f| f.write turtle_string}
10
10
 
11
11
  rcon = Rserve::Connection.new
12
- gen = R2RDF::Reader::Dataframe.new
12
+ gen = PubliSci::Reader::Dataframe.new
13
13
  rcon.void_eval <<-EOF
14
14
  library(qtl)
15
15
  data(listeria)
@@ -1,10 +1,10 @@
1
1
  require_relative '../../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::ORM::DataCube do
3
+ describe PubliSci::ORM::DataCube do
4
4
 
5
5
  it "should load and save a turtle file without loss of information" do
6
6
  ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
7
- cube = R2RDF::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
7
+ cube = PubliSci::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
8
8
  cube.abbreviate_known(cube.to_n3).should == ref
9
9
  # cube.to_n3.should == ref
10
10
  end
@@ -0,0 +1,19 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+ include PubliSci::Prov::DSL
3
+ # include PubliSci::Prov
4
+
5
+ describe PubliSci::Prov::Model do
6
+ it "can be loaded from" do
7
+ ev = PubliSci::Prov::DSL::Instance.new
8
+ r = ev.instance_eval do
9
+ entity :datathing
10
+
11
+ activity :process, generated: :datathing
12
+
13
+ to_repository
14
+ end
15
+
16
+ Spira.add_repository :default, r
17
+ PubliSci::Prov::Model::Entity.first.should_not be nil
18
+ end
19
+ end
@@ -1,25 +1,25 @@
1
1
  require_relative '../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::Analyzer do
4
- class Ana
5
- include R2RDF::Analyzer
3
+ describe PubliSci::Analyzer do
4
+ class Ana
5
+ include PubliSci::Analyzer
6
6
  end
7
7
 
8
8
  before(:all) do
9
9
  @analyzer = Ana.new
10
-
10
+
11
11
  @measures = ['chunkiness','deliciousness']
12
12
  @dimensions = ['producer', 'pricerange']
13
13
  @labels = %w(hormel newskies whys)
14
- @data =
14
+ @data =
15
15
  {
16
16
  "producer" => ["hormel","newskies", "whys"],
17
17
  "pricerange" => ["low", "medium", "nonexistant"],
18
18
  "chunkiness"=> [1, 6, 9001],
19
- "deliciousness"=> [1, 9, 6]
19
+ "deliciousness"=> [1, 9, 6]
20
20
  }
21
21
  end
22
-
22
+
23
23
  it "should run a basic validation" do
24
24
  newdata = []
25
25
 
@@ -7,13 +7,13 @@
7
7
  require_relative '../lib/bio-publisci.rb'
8
8
 
9
9
 
10
- describe R2RDF::Dataset::DataCube do
10
+ describe PubliSci::Dataset::DataCube do
11
11
 
12
12
  context "with Plain Old Ruby objects" do
13
13
  #define a temporary class to use module methods
14
14
  before(:all) do
15
15
  class Gen
16
- include R2RDF::Dataset::DataCube
16
+ include PubliSci::Dataset::DataCube
17
17
  end
18
18
 
19
19
  @generator = Gen.new
@@ -21,20 +21,20 @@ describe R2RDF::Dataset::DataCube do
21
21
  @dimensions = ['producer', 'pricerange']
22
22
  @codes = @dimensions #all dimensions coded for the tests
23
23
  @labels = %w(hormel newskies whys)
24
- @data =
24
+ @data =
25
25
  {
26
26
  "producer" => ["hormel","newskies", "whys"],
27
27
  "pricerange" => ["low", "medium", "nonexistant"],
28
28
  "chunkiness"=> [1, 6, 9001],
29
- "deliciousness"=> [1, 9, 6]
29
+ "deliciousness"=> [1, 9, 6]
30
30
  }
31
31
  end
32
32
 
33
33
  it "should have correct output according to the reference file" do
34
-
34
+
35
35
  turtle_string = @generator.generate(@measures, @dimensions, @codes, @data, @labels, 'bacon')
36
36
  ref = IO.read(File.dirname(__FILE__) + '/turtle/bacon')
37
- turtle_string.should == ref
37
+ turtle_string.should == ref
38
38
  end
39
39
 
40
40
  context "with missing values" do
@@ -45,12 +45,12 @@ describe R2RDF::Dataset::DataCube do
45
45
  "producer" => "missingbacon",
46
46
  "pricerange" => "unknown",
47
47
  "chunkiness"=> nil,
48
- "deliciousness"=> nil,
48
+ "deliciousness"=> nil,
49
49
  }
50
50
  missingobs.map{|k,v| @missing_data[k] << v}
51
51
  end
52
52
 
53
- it "skips observations with missing values by default" do
53
+ it "skips observations with missing values by default" do
54
54
  turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
55
55
  turtle_string[/.*obsmissingbacon.*\n/].should be nil
56
56
  end
@@ -78,7 +78,7 @@ describe R2RDF::Dataset::DataCube do
78
78
  end
79
79
 
80
80
  it 'generates component specifications' do
81
- components = @generator.component_specifications(@measures , @dimensions, "bacon")
81
+ components = @generator.component_specifications(@measures , @dimensions, @codes, "bacon")
82
82
  components.is_a?(Array).should == true
83
83
  components.first.is_a?(String).should == true
84
84
  end
@@ -97,16 +97,16 @@ describe R2RDF::Dataset::DataCube do
97
97
 
98
98
  it 'generates observations' do
99
99
  #measures, dimensions, codes, var, observation_labels, data, options={}
100
-
100
+
101
101
  observations = @generator.observations(@measures, @dimensions, @codes, @data, @labels, "bacon")
102
102
  observations.is_a?(Array).should == true
103
103
  observations.first.is_a?(String).should == true
104
104
  end
105
105
  end
106
-
106
+
107
107
  context "under official integrity constraints" do
108
108
  before(:all) do
109
- @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
109
+ @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
110
110
  @checks = {}
111
111
  Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
112
112
  if file.split('.').last == 'rq'
@@ -156,7 +156,7 @@ describe R2RDF::Dataset::DataCube do
156
156
  # SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
157
157
  end
158
158
  end
159
-
159
+
160
160
 
161
161
  it "can set dimensions vs measures via hash" do
162
162
 
@@ -1,18 +1,18 @@
1
1
  require_relative '../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::Dataset do
3
+ describe PubliSci::Dataset do
4
4
  context 'with a csv file' do
5
5
  before(:all) do
6
6
  @file = File.dirname(__FILE__) + '/csv/bacon.csv'
7
7
  end
8
8
 
9
9
  it "should load with no prompts if all details are specified" do
10
- turtle_string = R2RDF::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
10
+ turtle_string = PubliSci::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
11
11
  (turtle_string =~ /qb:Observation/).should_not be nil
12
12
  end
13
13
 
14
14
  it "will request user input if not provided" do
15
- gen = R2RDF::Reader::CSV.new
15
+ gen = PubliSci::Reader::CSV.new
16
16
  gen.stub(:gets).and_return('pricerange,producer')
17
17
  gen.stub(:puts)
18
18
  turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
@@ -21,7 +21,14 @@ describe R2RDF::Dataset do
21
21
  end
22
22
 
23
23
  it "will try to guess if told not to be interactive" do
24
- turtle_string = R2RDF::Dataset.for(@file,false)
24
+ turtle_string = PubliSci::Dataset.for(@file,false)
25
+ (turtle_string =~ /prop:pricerange/).should_not be nil
26
+ (turtle_string =~ /prop:producer/).should_not be nil
27
+ end
28
+
29
+ it "will attempt to load remote file if given URI" do
30
+ loc = 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
31
+ turtle_string = PubliSci::Dataset.for(loc,false)
25
32
  (turtle_string =~ /prop:pricerange/).should_not be nil
26
33
  (turtle_string =~ /prop:producer/).should_not be nil
27
34
  end
data/spec/dsl_spec.rb ADDED
@@ -0,0 +1,90 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::DSL do
4
+ include PubliSci::DSL
5
+
6
+ before(:each) do
7
+ PubliSci::Prov.registry.clear
8
+ PubliSci::Metadata.registry.clear
9
+ PubliSci::Dataset.registry.clear
10
+ end
11
+
12
+ it "can generate dataset, metadata, and provenance when given a script" do
13
+
14
+ dat = data do
15
+ object 'spec/csv/bacon.csv'
16
+ end
17
+ met = metadata do
18
+ name "Will"
19
+ end
20
+ prv = provenance do
21
+ entity :a_thing
22
+ end
23
+
24
+ met.should_not be nil
25
+ prv.should_not be nil
26
+ dat.should_not be nil
27
+
28
+ generate_n3.size.should > 0
29
+ end
30
+
31
+ it "can generate dataset, metadata, and provenance when given a script" do
32
+ dat = data do
33
+ object 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
34
+ end
35
+ dat.should_not be nil
36
+ generate_n3.size.should > 0
37
+ end
38
+
39
+ it "can set generator options" do
40
+ dat = data do
41
+ object 'spec/csv/bacon.csv'
42
+ option :no_labels, true
43
+ end
44
+
45
+ str = generate_n3
46
+ str[/rdfs:label "\d"/].should == nil
47
+ end
48
+
49
+ it "can output to in-memory repository" do
50
+ dat = data do
51
+ object 'spec/csv/bacon.csv'
52
+ end
53
+
54
+ repo = to_repository
55
+ repo.is_a?(RDF::Repository).should be true
56
+ repo.size.should > 0
57
+ end
58
+
59
+ it "can output to 4store repository", no_travis: true do
60
+ configure do |cfg|
61
+ cfg.repository = :fourstore
62
+ end
63
+
64
+ dat = data do
65
+ object 'spec/csv/bacon.csv'
66
+ end
67
+
68
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
69
+ old_size = repo.size
70
+ repo = to_repository
71
+ repo.is_a?(RDF::FourStore::Repository).should be true
72
+ repo.size.should > old_size
73
+ end
74
+
75
+ it "can output provenance to 4store", no_travis: true do
76
+ ev = PubliSci::Prov::DSL::Instance.new
77
+ str = IO.read('examples/primer-full.prov')
78
+ ev.instance_eval(str,'examples/primer-full.prov')
79
+ ev.instance_eval <<-EOF
80
+ configure do |cfg|
81
+ cfg.repository = :fourstore
82
+ end
83
+ EOF
84
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
85
+ old_size = repo.size
86
+ repo = ev.to_repository
87
+ repo.is_a?(RDF::FourStore::Repository).should be true
88
+ repo.size.should > old_size
89
+ end
90
+ end
@@ -5,8 +5,8 @@ require_relative '../../lib/bio-publisci.rb'
5
5
  # require 'rdf/turtle'
6
6
  require 'tempfile'
7
7
 
8
- describe R2RDF::Reader::CSV do
9
-
8
+ describe PubliSci::Reader::CSV do
9
+
10
10
  def create_graph(turtle_string)
11
11
  f = Tempfile.new('graph')
12
12
  f.write(turtle_string)
@@ -16,8 +16,8 @@ describe R2RDF::Reader::CSV do
16
16
  graph
17
17
  end
18
18
 
19
- before(:each) do
20
- @generator = R2RDF::Reader::CSV.new
19
+ before(:each) do
20
+ @generator = PubliSci::Reader::CSV.new
21
21
  end
22
22
 
23
23
  context 'with reference CSV' do
@@ -1,7 +1,7 @@
1
1
  require_relative '../../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::Reader::Dataframe do
4
-
3
+ describe PubliSci::Reader::Dataframe do
4
+
5
5
  def create_graph(turtle_string)
6
6
  f = Tempfile.new('graph')
7
7
  f.write(turtle_string)
@@ -12,9 +12,9 @@ describe R2RDF::Reader::Dataframe do
12
12
  end
13
13
 
14
14
  context "with r/qtl dataframe", no_travis: true do
15
- before(:all) do
15
+ before(:all) do
16
16
  @r = Rserve::Connection.new
17
- @generator = R2RDF::Reader::Dataframe.new
17
+ @generator = PubliSci::Reader::Dataframe.new
18
18
  @r.eval <<-EOF
19
19
  library(qtl)
20
20
  data(listeria)
@@ -28,7 +28,7 @@ EOF
28
28
  turtle = @generator.generate_n3(@rexp,'mr')
29
29
  turtle.is_a?(String).should be true
30
30
  end
31
-
31
+
32
32
  it "creates correct graph according to refrence file" do
33
33
  reference = IO.read(File.dirname(__FILE__) + '/../turtle/reference')
34
34
  @turtle.should eq reference
@@ -39,6 +39,6 @@ EOF
39
39
  end
40
40
  end
41
41
 
42
-
42
+
43
43
 
44
44
  end
@@ -2,7 +2,7 @@ require_relative '../../lib/bio-publisci.rb'
2
2
 
3
3
  require 'tempfile'
4
4
 
5
- describe R2RDF::Reader::RCross do
5
+ describe PubliSci::Reader::RCross do
6
6
 
7
7
  def create_graph(turtle_string)
8
8
  f = Tempfile.new('graph')
@@ -16,7 +16,7 @@ describe R2RDF::Reader::RCross do
16
16
  context "with reduced listeria cross", no_travis: true do
17
17
  before(:all) do
18
18
  @r = Rserve::Connection.new
19
- @generator = R2RDF::Reader::RCross.new
19
+ @generator = PubliSci::Reader::RCross.new
20
20
  @r.eval <<-EOF
21
21
  library(qtl)
22
22
  data(listeria)
@@ -6,7 +6,7 @@ require_relative '../../lib/bio-publisci.rb'
6
6
 
7
7
  require 'tempfile'
8
8
 
9
- describe R2RDF::Reader::RMatrix do
9
+ describe PubliSci::Reader::RMatrix do
10
10
 
11
11
  def create_graph(turtle_string)
12
12
  f = Tempfile.new('graph')
@@ -18,7 +18,7 @@ describe R2RDF::Reader::RMatrix do
18
18
  end
19
19
 
20
20
  before(:each) do
21
- @generator = R2RDF::Reader::RMatrix.new
21
+ @generator = PubliSci::Reader::RMatrix.new
22
22
  @connection = Rserve::Connection.new
23
23
  end
24
24