bio-publisci 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +1 -1
  4. data/examples/prov_dsl.prov +2 -1
  5. data/examples/safe_gen.rb +7 -0
  6. data/examples/visualization/primer.prov +66 -0
  7. data/examples/visualization/prov_viz.rb +140 -0
  8. data/examples/visualization/viz.rb +35 -0
  9. data/features/metadata_steps.rb +2 -4
  10. data/features/orm_steps.rb +4 -4
  11. data/features/reader_steps.rb +1 -1
  12. data/features/store_steps.rb +1 -1
  13. data/features/writer.feature +1 -1
  14. data/features/writer_steps.rb +1 -1
  15. data/lib/bio-publisci.rb +10 -2
  16. data/lib/bio-publisci/analyzer.rb +4 -4
  17. data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
  18. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
  19. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
  20. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  21. data/lib/bio-publisci/dataset/data_cube.rb +28 -17
  22. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  23. data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
  24. data/lib/bio-publisci/dataset/interactive.rb +1 -1
  25. data/lib/bio-publisci/dsl/config.rb +34 -0
  26. data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
  27. data/lib/bio-publisci/dsl/dsl.rb +69 -0
  28. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  29. data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
  30. data/lib/bio-publisci/metadata/generator.rb +323 -0
  31. data/lib/bio-publisci/metadata/metadata.rb +3 -314
  32. data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
  33. data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
  34. data/lib/bio-publisci/metadata/prov/association.rb +2 -2
  35. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  36. data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
  37. data/lib/bio-publisci/metadata/prov/element.rb +2 -2
  38. data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
  39. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
  40. data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
  41. data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
  42. data/lib/bio-publisci/metadata/prov/role.rb +1 -1
  43. data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
  44. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  45. data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
  46. data/lib/bio-publisci/mixins/registry.rb +27 -0
  47. data/lib/bio-publisci/output.rb +1 -1
  48. data/lib/bio-publisci/parser.rb +1 -1
  49. data/lib/bio-publisci/query/query_helper.rb +14 -14
  50. data/lib/bio-publisci/r_client.rb +5 -5
  51. data/lib/bio-publisci/readers/arff.rb +5 -5
  52. data/lib/bio-publisci/readers/csv.rb +3 -3
  53. data/lib/bio-publisci/readers/dataframe.rb +3 -3
  54. data/lib/bio-publisci/readers/r_cross.rb +4 -4
  55. data/lib/bio-publisci/readers/r_matrix.rb +3 -3
  56. data/lib/bio-publisci/store.rb +3 -3
  57. data/lib/bio-publisci/writers/arff.rb +6 -6
  58. data/lib/bio-publisci/writers/dataframe.rb +5 -5
  59. data/scripts/islet_mlratio.rb +1 -1
  60. data/scripts/scan_islet.rb +1 -1
  61. data/scripts/update_reference.rb +2 -2
  62. data/spec/ORM/data_cube_orm_spec.rb +2 -2
  63. data/spec/ORM/prov_model_spec.rb +19 -0
  64. data/spec/analyzer_spec.rb +7 -7
  65. data/spec/data_cube_spec.rb +13 -13
  66. data/spec/dataset_for_spec.rb +11 -4
  67. data/spec/dsl_spec.rb +90 -0
  68. data/spec/generators/csv_spec.rb +4 -4
  69. data/spec/generators/dataframe_spec.rb +6 -6
  70. data/spec/generators/r_cross_spec.rb +2 -2
  71. data/spec/generators/r_matrix_spec.rb +2 -2
  72. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  73. data/spec/prov/activity_spec.rb +4 -4
  74. data/spec/prov/agent_spec.rb +3 -4
  75. data/spec/prov/association_spec.rb +1 -2
  76. data/spec/prov/config_spec.rb +28 -0
  77. data/spec/prov/derivation_spec.rb +30 -0
  78. data/spec/prov/entity_spec.rb +3 -4
  79. data/spec/prov/role_spec.rb +1 -2
  80. data/spec/prov/usage_spec.rb +1 -2
  81. data/spec/r_builder_spec.rb +3 -3
  82. data/spec/turtle/bacon +20 -4
  83. data/spec/turtle/reference +20 -4
  84. metadata +37 -4
@@ -1,4 +1,4 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module Writer
3
3
  module Dataframe
4
4
 
@@ -39,7 +39,7 @@ module R2RDF
39
39
  end
40
40
 
41
41
  class Builder
42
- include R2RDF::Writer::Dataframe
42
+ include PubliSci::Writer::Dataframe
43
43
 
44
44
 
45
45
  def from_turtle(turtle_file, connection, variable_in=nil, variable_out=nil, verbose=true, save=true)
@@ -51,7 +51,7 @@ module R2RDF
51
51
  repo = RDF::Repository.load(turtle_file)
52
52
  puts "loaded #{repo.size} statements into temporary repo" if verbose
53
53
  # connection = Rserve::Connection.new
54
- query = R2RDF::QueryHelper.new
54
+ query = PubliSci::QueryHelper.new
55
55
  rows = get_rownames(variable_in, query, repo)
56
56
  puts "frame has #{rows.size} rows" if verbose
57
57
 
@@ -69,8 +69,8 @@ module R2RDF
69
69
  end
70
70
  puts "connecting to endpoint at #{endpoint_url}" if verbose
71
71
  sparql = SPARQL::Client.new(endpoint_url)
72
- # client = R2RDF::Client.new
73
- query = R2RDF::QueryHelper.new
72
+ # client = PubliSci::Client.new
73
+ query = PubliSci::QueryHelper.new
74
74
 
75
75
  rows = query.get_ary(sparql.query(query.row_names(variable_in))).flatten
76
76
 
@@ -1,6 +1,6 @@
1
1
  load File.dirname(__FILE__) + '/../lib/bio-publisci.rb'
2
2
 
3
- gen = R2RDF::Reader::RMatrix.new
3
+ gen = PubliSci::Reader::RMatrix.new
4
4
  con = Rserve::Connection.new
5
5
  con.eval("load('#{ARGV[0] || './.RData'}')")
6
6
  gen.generate_n3(con, "islet.mlratio", "pheno", {measures: ["probe","individual","pheno"], no_labels: true})
@@ -1,6 +1,6 @@
1
1
  load File.dirname(__FILE__) + '/../lib/bio-publisci.rb'
2
2
 
3
- gen = R2RDF::Reader::RMatrix.new
3
+ gen = PubliSci::Reader::RMatrix.new
4
4
  con = Rserve::Connection.new
5
5
  con.eval("load('#{ARGV[0] || './.RData'}')")
6
6
  gen.generate_n3(con, "scan.islet", "scan", {measures: ["probe","marker","lod"], no_labels: true})
@@ -4,12 +4,12 @@ exit unless gets.chomp == 'y'
4
4
  puts "overwriting #{File.absolute_path(File.dirname(__FILE__) + '/../spec/turtle/bacon')}"
5
5
  load File.dirname(__FILE__) + '/../lib/bio-publisci.rb'
6
6
 
7
- gen = R2RDF::Reader::CSV.new
7
+ gen = PubliSci::Reader::CSV.new
8
8
  turtle_string = gen.generate_n3(File.dirname(__FILE__) + '/../spec/csv/bacon.csv','bacon',{dimensions:["producer","pricerange"], label_column:0})
9
9
  open(File.dirname(__FILE__) + '/../spec/turtle/bacon', 'w'){|f| f.write turtle_string}
10
10
 
11
11
  rcon = Rserve::Connection.new
12
- gen = R2RDF::Reader::Dataframe.new
12
+ gen = PubliSci::Reader::Dataframe.new
13
13
  rcon.void_eval <<-EOF
14
14
  library(qtl)
15
15
  data(listeria)
@@ -1,10 +1,10 @@
1
1
  require_relative '../../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::ORM::DataCube do
3
+ describe PubliSci::ORM::DataCube do
4
4
 
5
5
  it "should load and save a turtle file without loss of information" do
6
6
  ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
7
- cube = R2RDF::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
7
+ cube = PubliSci::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
8
8
  cube.abbreviate_known(cube.to_n3).should == ref
9
9
  # cube.to_n3.should == ref
10
10
  end
@@ -0,0 +1,19 @@
1
+ require_relative '../../lib/bio-publisci.rb'
2
+ include PubliSci::Prov::DSL
3
+ # include PubliSci::Prov
4
+
5
+ describe PubliSci::Prov::Model do
6
+ it "can be loaded from" do
7
+ ev = PubliSci::Prov::DSL::Instance.new
8
+ r = ev.instance_eval do
9
+ entity :datathing
10
+
11
+ activity :process, generated: :datathing
12
+
13
+ to_repository
14
+ end
15
+
16
+ Spira.add_repository :default, r
17
+ PubliSci::Prov::Model::Entity.first.should_not be nil
18
+ end
19
+ end
@@ -1,25 +1,25 @@
1
1
  require_relative '../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::Analyzer do
4
- class Ana
5
- include R2RDF::Analyzer
3
+ describe PubliSci::Analyzer do
4
+ class Ana
5
+ include PubliSci::Analyzer
6
6
  end
7
7
 
8
8
  before(:all) do
9
9
  @analyzer = Ana.new
10
-
10
+
11
11
  @measures = ['chunkiness','deliciousness']
12
12
  @dimensions = ['producer', 'pricerange']
13
13
  @labels = %w(hormel newskies whys)
14
- @data =
14
+ @data =
15
15
  {
16
16
  "producer" => ["hormel","newskies", "whys"],
17
17
  "pricerange" => ["low", "medium", "nonexistant"],
18
18
  "chunkiness"=> [1, 6, 9001],
19
- "deliciousness"=> [1, 9, 6]
19
+ "deliciousness"=> [1, 9, 6]
20
20
  }
21
21
  end
22
-
22
+
23
23
  it "should run a basic validation" do
24
24
  newdata = []
25
25
 
@@ -7,13 +7,13 @@
7
7
  require_relative '../lib/bio-publisci.rb'
8
8
 
9
9
 
10
- describe R2RDF::Dataset::DataCube do
10
+ describe PubliSci::Dataset::DataCube do
11
11
 
12
12
  context "with Plain Old Ruby objects" do
13
13
  #define a temporary class to use module methods
14
14
  before(:all) do
15
15
  class Gen
16
- include R2RDF::Dataset::DataCube
16
+ include PubliSci::Dataset::DataCube
17
17
  end
18
18
 
19
19
  @generator = Gen.new
@@ -21,20 +21,20 @@ describe R2RDF::Dataset::DataCube do
21
21
  @dimensions = ['producer', 'pricerange']
22
22
  @codes = @dimensions #all dimensions coded for the tests
23
23
  @labels = %w(hormel newskies whys)
24
- @data =
24
+ @data =
25
25
  {
26
26
  "producer" => ["hormel","newskies", "whys"],
27
27
  "pricerange" => ["low", "medium", "nonexistant"],
28
28
  "chunkiness"=> [1, 6, 9001],
29
- "deliciousness"=> [1, 9, 6]
29
+ "deliciousness"=> [1, 9, 6]
30
30
  }
31
31
  end
32
32
 
33
33
  it "should have correct output according to the reference file" do
34
-
34
+
35
35
  turtle_string = @generator.generate(@measures, @dimensions, @codes, @data, @labels, 'bacon')
36
36
  ref = IO.read(File.dirname(__FILE__) + '/turtle/bacon')
37
- turtle_string.should == ref
37
+ turtle_string.should == ref
38
38
  end
39
39
 
40
40
  context "with missing values" do
@@ -45,12 +45,12 @@ describe R2RDF::Dataset::DataCube do
45
45
  "producer" => "missingbacon",
46
46
  "pricerange" => "unknown",
47
47
  "chunkiness"=> nil,
48
- "deliciousness"=> nil,
48
+ "deliciousness"=> nil,
49
49
  }
50
50
  missingobs.map{|k,v| @missing_data[k] << v}
51
51
  end
52
52
 
53
- it "skips observations with missing values by default" do
53
+ it "skips observations with missing values by default" do
54
54
  turtle_string = @generator.generate(@measures, @dimensions, @codes, @missing_data, @labels + ["missingbacon"], 'bacon')
55
55
  turtle_string[/.*obsmissingbacon.*\n/].should be nil
56
56
  end
@@ -78,7 +78,7 @@ describe R2RDF::Dataset::DataCube do
78
78
  end
79
79
 
80
80
  it 'generates component specifications' do
81
- components = @generator.component_specifications(@measures , @dimensions, "bacon")
81
+ components = @generator.component_specifications(@measures , @dimensions, @codes, "bacon")
82
82
  components.is_a?(Array).should == true
83
83
  components.first.is_a?(String).should == true
84
84
  end
@@ -97,16 +97,16 @@ describe R2RDF::Dataset::DataCube do
97
97
 
98
98
  it 'generates observations' do
99
99
  #measures, dimensions, codes, var, observation_labels, data, options={}
100
-
100
+
101
101
  observations = @generator.observations(@measures, @dimensions, @codes, @data, @labels, "bacon")
102
102
  observations.is_a?(Array).should == true
103
103
  observations.first.is_a?(String).should == true
104
104
  end
105
105
  end
106
-
106
+
107
107
  context "under official integrity constraints" do
108
108
  before(:all) do
109
- @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
109
+ @graph = RDF::Graph.load(File.dirname(__FILE__) + '/turtle/reference', :format => :ttl)
110
110
  @checks = {}
111
111
  Dir.foreach(File.dirname(__FILE__) + '/queries/integrity') do |file|
112
112
  if file.split('.').last == 'rq'
@@ -156,7 +156,7 @@ describe R2RDF::Dataset::DataCube do
156
156
  # SPARQL.execute(@checks['19_2'], @graph).first.should be_nil
157
157
  end
158
158
  end
159
-
159
+
160
160
 
161
161
  it "can set dimensions vs measures via hash" do
162
162
 
@@ -1,18 +1,18 @@
1
1
  require_relative '../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::Dataset do
3
+ describe PubliSci::Dataset do
4
4
  context 'with a csv file' do
5
5
  before(:all) do
6
6
  @file = File.dirname(__FILE__) + '/csv/bacon.csv'
7
7
  end
8
8
 
9
9
  it "should load with no prompts if all details are specified" do
10
- turtle_string = R2RDF::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
10
+ turtle_string = PubliSci::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
11
11
  (turtle_string =~ /qb:Observation/).should_not be nil
12
12
  end
13
13
 
14
14
  it "will request user input if not provided" do
15
- gen = R2RDF::Reader::CSV.new
15
+ gen = PubliSci::Reader::CSV.new
16
16
  gen.stub(:gets).and_return('pricerange,producer')
17
17
  gen.stub(:puts)
18
18
  turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
@@ -21,7 +21,14 @@ describe R2RDF::Dataset do
21
21
  end
22
22
 
23
23
  it "will try to guess if told not to be interactive" do
24
- turtle_string = R2RDF::Dataset.for(@file,false)
24
+ turtle_string = PubliSci::Dataset.for(@file,false)
25
+ (turtle_string =~ /prop:pricerange/).should_not be nil
26
+ (turtle_string =~ /prop:producer/).should_not be nil
27
+ end
28
+
29
+ it "will attempt to load remote file if given URI" do
30
+ loc = 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
31
+ turtle_string = PubliSci::Dataset.for(loc,false)
25
32
  (turtle_string =~ /prop:pricerange/).should_not be nil
26
33
  (turtle_string =~ /prop:producer/).should_not be nil
27
34
  end
data/spec/dsl_spec.rb ADDED
@@ -0,0 +1,90 @@
1
+ require_relative '../lib/bio-publisci.rb'
2
+
3
+ describe PubliSci::DSL do
4
+ include PubliSci::DSL
5
+
6
+ before(:each) do
7
+ PubliSci::Prov.registry.clear
8
+ PubliSci::Metadata.registry.clear
9
+ PubliSci::Dataset.registry.clear
10
+ end
11
+
12
+ it "can generate dataset, metadata, and provenance when given a script" do
13
+
14
+ dat = data do
15
+ object 'spec/csv/bacon.csv'
16
+ end
17
+ met = metadata do
18
+ name "Will"
19
+ end
20
+ prv = provenance do
21
+ entity :a_thing
22
+ end
23
+
24
+ met.should_not be nil
25
+ prv.should_not be nil
26
+ dat.should_not be nil
27
+
28
+ generate_n3.size.should > 0
29
+ end
30
+
31
+ it "can generate dataset, metadata, and provenance when given a script" do
32
+ dat = data do
33
+ object 'https://raw.github.com/wstrinz/bioruby-publisci/master/spec/csv/bacon.csv'
34
+ end
35
+ dat.should_not be nil
36
+ generate_n3.size.should > 0
37
+ end
38
+
39
+ it "can set generator options" do
40
+ dat = data do
41
+ object 'spec/csv/bacon.csv'
42
+ option :no_labels, true
43
+ end
44
+
45
+ str = generate_n3
46
+ str[/rdfs:label "\d"/].should == nil
47
+ end
48
+
49
+ it "can output to in-memory repository" do
50
+ dat = data do
51
+ object 'spec/csv/bacon.csv'
52
+ end
53
+
54
+ repo = to_repository
55
+ repo.is_a?(RDF::Repository).should be true
56
+ repo.size.should > 0
57
+ end
58
+
59
+ it "can output to 4store repository", no_travis: true do
60
+ configure do |cfg|
61
+ cfg.repository = :fourstore
62
+ end
63
+
64
+ dat = data do
65
+ object 'spec/csv/bacon.csv'
66
+ end
67
+
68
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
69
+ old_size = repo.size
70
+ repo = to_repository
71
+ repo.is_a?(RDF::FourStore::Repository).should be true
72
+ repo.size.should > old_size
73
+ end
74
+
75
+ it "can output provenance to 4store", no_travis: true do
76
+ ev = PubliSci::Prov::DSL::Instance.new
77
+ str = IO.read('examples/primer-full.prov')
78
+ ev.instance_eval(str,'examples/primer-full.prov')
79
+ ev.instance_eval <<-EOF
80
+ configure do |cfg|
81
+ cfg.repository = :fourstore
82
+ end
83
+ EOF
84
+ repo = RDF::FourStore::Repository.new('http://localhost:8080/')
85
+ old_size = repo.size
86
+ repo = ev.to_repository
87
+ repo.is_a?(RDF::FourStore::Repository).should be true
88
+ repo.size.should > old_size
89
+ end
90
+ end
@@ -5,8 +5,8 @@ require_relative '../../lib/bio-publisci.rb'
5
5
  # require 'rdf/turtle'
6
6
  require 'tempfile'
7
7
 
8
- describe R2RDF::Reader::CSV do
9
-
8
+ describe PubliSci::Reader::CSV do
9
+
10
10
  def create_graph(turtle_string)
11
11
  f = Tempfile.new('graph')
12
12
  f.write(turtle_string)
@@ -16,8 +16,8 @@ describe R2RDF::Reader::CSV do
16
16
  graph
17
17
  end
18
18
 
19
- before(:each) do
20
- @generator = R2RDF::Reader::CSV.new
19
+ before(:each) do
20
+ @generator = PubliSci::Reader::CSV.new
21
21
  end
22
22
 
23
23
  context 'with reference CSV' do
@@ -1,7 +1,7 @@
1
1
  require_relative '../../lib/bio-publisci.rb'
2
2
 
3
- describe R2RDF::Reader::Dataframe do
4
-
3
+ describe PubliSci::Reader::Dataframe do
4
+
5
5
  def create_graph(turtle_string)
6
6
  f = Tempfile.new('graph')
7
7
  f.write(turtle_string)
@@ -12,9 +12,9 @@ describe R2RDF::Reader::Dataframe do
12
12
  end
13
13
 
14
14
  context "with r/qtl dataframe", no_travis: true do
15
- before(:all) do
15
+ before(:all) do
16
16
  @r = Rserve::Connection.new
17
- @generator = R2RDF::Reader::Dataframe.new
17
+ @generator = PubliSci::Reader::Dataframe.new
18
18
  @r.eval <<-EOF
19
19
  library(qtl)
20
20
  data(listeria)
@@ -28,7 +28,7 @@ EOF
28
28
  turtle = @generator.generate_n3(@rexp,'mr')
29
29
  turtle.is_a?(String).should be true
30
30
  end
31
-
31
+
32
32
  it "creates correct graph according to refrence file" do
33
33
  reference = IO.read(File.dirname(__FILE__) + '/../turtle/reference')
34
34
  @turtle.should eq reference
@@ -39,6 +39,6 @@ EOF
39
39
  end
40
40
  end
41
41
 
42
-
42
+
43
43
 
44
44
  end
@@ -2,7 +2,7 @@ require_relative '../../lib/bio-publisci.rb'
2
2
 
3
3
  require 'tempfile'
4
4
 
5
- describe R2RDF::Reader::RCross do
5
+ describe PubliSci::Reader::RCross do
6
6
 
7
7
  def create_graph(turtle_string)
8
8
  f = Tempfile.new('graph')
@@ -16,7 +16,7 @@ describe R2RDF::Reader::RCross do
16
16
  context "with reduced listeria cross", no_travis: true do
17
17
  before(:all) do
18
18
  @r = Rserve::Connection.new
19
- @generator = R2RDF::Reader::RCross.new
19
+ @generator = PubliSci::Reader::RCross.new
20
20
  @r.eval <<-EOF
21
21
  library(qtl)
22
22
  data(listeria)
@@ -6,7 +6,7 @@ require_relative '../../lib/bio-publisci.rb'
6
6
 
7
7
  require 'tempfile'
8
8
 
9
- describe R2RDF::Reader::RMatrix do
9
+ describe PubliSci::Reader::RMatrix do
10
10
 
11
11
  def create_graph(turtle_string)
12
12
  f = Tempfile.new('graph')
@@ -18,7 +18,7 @@ describe R2RDF::Reader::RMatrix do
18
18
  end
19
19
 
20
20
  before(:each) do
21
- @generator = R2RDF::Reader::RMatrix.new
21
+ @generator = PubliSci::Reader::RMatrix.new
22
22
  @connection = Rserve::Connection.new
23
23
  end
24
24