bio-publisci 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +5 -5
  4. data/bin/bio-publisci +34 -11
  5. data/examples/bio-band_integration.rb +9 -0
  6. data/examples/no_magic.prov +40 -0
  7. data/examples/primer.prov +28 -0
  8. data/examples/prov_dsl.prov +51 -0
  9. data/features/create_generator.feature +5 -9
  10. data/features/integration_steps.rb +8 -8
  11. data/features/metadata.feature +15 -2
  12. data/features/metadata_steps.rb +21 -0
  13. data/features/orm_steps.rb +5 -5
  14. data/features/prov_dsl.feature +14 -0
  15. data/features/prov_dsl_steps.rb +11 -0
  16. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
  17. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
  18. data/lib/bio-publisci/dataset/data_cube.rb +30 -26
  19. data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
  20. data/lib/bio-publisci/metadata/metadata.rb +180 -42
  21. data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
  22. data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
  23. data/lib/bio-publisci/metadata/prov/association.rb +73 -0
  24. data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
  25. data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
  26. data/lib/bio-publisci/metadata/prov/element.rb +52 -0
  27. data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
  28. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  29. data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
  30. data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
  31. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  32. data/lib/bio-publisci/output.rb +27 -0
  33. data/lib/bio-publisci/parser.rb +17 -8
  34. data/lib/bio-publisci/readers/csv.rb +9 -7
  35. data/lib/bio-publisci/readers/dataframe.rb +9 -8
  36. data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
  37. data/lib/bio-publisci/readers/r_matrix.rb +37 -13
  38. data/lib/bio-publisci/spira.rb +82 -0
  39. data/lib/bio-publisci/writers/dataframe.rb +65 -65
  40. data/lib/bio-publisci.rb +9 -4
  41. data/spec/ORM/data_cube_orm_spec.rb +3 -3
  42. data/spec/dataset_for_spec.rb +29 -0
  43. data/spec/generators/r_cross_spec.rb +51 -0
  44. data/spec/generators/r_matrix_spec.rb +14 -5
  45. metadata +42 -8
  46. data/lib/bio-publisci/readers/cross.rb +0 -72
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f03476f5595b28e10a4cb3d950b9c454d621eb4d
4
- data.tar.gz: da473d84d9c1e203de4ea1b2d7fef2579cfa8bf8
3
+ metadata.gz: 3cb588f4ec6cfa77e1e9011b687ca3e148c6ca6e
4
+ data.tar.gz: 6a257692f2317eaea75f0fe306b93e01c1de6a70
5
5
  SHA512:
6
- metadata.gz: 33662112c4df2115f15d59f415329dfa11368e8bd1a47907d62c02b476bbeb4afe0a1c43599d2c215a093d1765b7ffb05320220e21888267811c86beacef8876
7
- data.tar.gz: 683b5874d082ab155227ee92a1e9bfe15a66b0eaf4fe1e590db39a9391346ec31850b0a94d924c9d26b356fb6ed7f82a228c9d16afa1a67d9c6710086e849963
6
+ metadata.gz: 163459c5c65c023475a5cb9853c7300d8b5f2a0c4afdf8a98a9f7f7f61ba9103c15e562f01c08a19f7c2d08853ce31447b0710c29e14d9c6f954705f82c26bba
7
+ data.tar.gz: 6e07fbb66df785ef3d15820d825823fdcd7ee18fe60cd68f8009f9f92c3204fc8945c3e0ac3bf79663b66d1259036da4f6d2749bac2d34f8870c73c25d850cbc
data/Gemfile CHANGED
@@ -13,6 +13,7 @@ group :development do
13
13
  gem "bundler", ">= 1.0.21"
14
14
  gem "bio", ">= 1.4.2"
15
15
  gem "rdoc", "~> 3.12"
16
+ gem "spoon"
16
17
  end
17
18
 
18
19
 
data/Rakefile CHANGED
@@ -17,11 +17,11 @@ Jeweler::Tasks.new do |gem|
17
17
  gem.name = "bio-publisci"
18
18
  gem.homepage = "http://github.com/wstrinz/bioruby-publisci"
19
19
  gem.license = "MIT"
20
- gem.summary = %Q{Publish science data using semantic web ontologies}
21
- gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF and related technologies }
20
+ gem.summary = %Q{Publish scientific results to the semantic web}
21
+ gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF, OWL, and related technologies }
22
22
  gem.email = "wstrinz@gmail.com"
23
- gem.authors = ["wstrinz"]
24
- gem.version = "0.0.3"
23
+ gem.authors = ["Will Strinz"]
24
+ gem.version = "0.0.4"
25
25
 
26
26
  # dependencies defined in Gemfile
27
27
  end
@@ -50,7 +50,7 @@ task :default => [] do
50
50
  end
51
51
 
52
52
  task :test => [] do
53
- begin
53
+ begin
54
54
  Rake::Task[:spec].invoke
55
55
  rescue
56
56
  end
data/bin/bio-publisci CHANGED
@@ -4,25 +4,48 @@
4
4
  # Author:: wstrinz
5
5
  # Copyright:: 2013
6
6
 
7
- USAGE = "Describe bio-publisci"
7
+ version = "0.0.4" #File.new(VERSION_FILENAME).read.chomp
8
+
9
+ USAGE = <<-EOF
10
+ bio-publisci #{version} by Will Strinz 2013
11
+
12
+ Usage:
13
+
14
+ bio-publisci file
15
+ (execute PROV dsl file)
16
+ EOF
8
17
 
9
18
  gempath = File.dirname(File.dirname(__FILE__))
10
19
  $: << File.join(gempath,'lib')
20
+ require 'bio-publisci'
11
21
 
12
- VERSION_FILENAME=File.join(gempath,'VERSION')
13
- version = File.new(VERSION_FILENAME).read.chomp
22
+ #VERSION_FILENAME=File.join(gempath,'VERSION')
14
23
 
15
24
  # print banner
16
- print "bio-publisci #{version} by wstrinz 2013\n"
17
25
 
18
26
  if ARGV.size == 0
19
27
  print USAGE
28
+ elsif ARGV.size == 1
29
+ #assume file, run DSL (prov for now)
30
+ if File.exist? ARGV[0]
31
+ puts "#{PubliSci::Prov.prefixes}\n#{PubliSci::Prov.run(ARGV[0])}"
32
+ else
33
+ puts "(no file #{ARGV[0]})"
34
+ print USAGE
35
+ end
36
+ else
37
+ if ARGV.size % 2 == 0
38
+ opts=Hash[*ARGV]
39
+ puts "got options #{opts}"
40
+ else
41
+ print USAGE
42
+ end
20
43
  end
21
44
 
22
45
  require 'bio-publisci'
23
46
  require 'optparse'
24
47
 
25
- # Uncomment when using the bio-logger
48
+ # Uncomment when using the bio-logger
26
49
  # require 'bio-logger'
27
50
  # Bio::Log::CLI.logger('stderr')
28
51
  # Bio::Log::CLI.trace('info')
@@ -35,14 +58,14 @@ opts = OptionParser.new do |o|
35
58
  # TODO: your logic here, below an example
36
59
  options[:example_parameter] = 'this is a parameter'
37
60
  end
38
-
61
+
39
62
  o.separator ""
40
63
  o.on("--switch-example", 'TODO: put a description for the SWITCH') do
41
64
  # TODO: your logic here, below an example
42
65
  self[:example_switch] = true
43
66
  end
44
67
 
45
- # Uncomment the following when using the bio-logger
68
+ # Uncomment the following when using the bio-logger
46
69
  # o.separator ""
47
70
  # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
48
71
  # Bio::Log::CLI.logger(name)
@@ -51,15 +74,15 @@ opts = OptionParser.new do |o|
51
74
  # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
52
75
  # Bio::Log::CLI.trace(s)
53
76
  # end
54
- #
77
+ #
55
78
  # o.on("-q", "--quiet", "Run quietly") do |q|
56
79
  # Bio::Log::CLI.trace('error')
57
80
  # end
58
- #
81
+ #
59
82
  # o.on("-v", "--verbose", "Run verbosely") do |v|
60
83
  # Bio::Log::CLI.trace('info')
61
84
  # end
62
- #
85
+ #
63
86
  # o.on("--debug", "Show debug messages") do |v|
64
87
  # Bio::Log::CLI.trace('debug')
65
88
  # end
@@ -73,7 +96,7 @@ end
73
96
  begin
74
97
  opts.parse!(ARGV)
75
98
 
76
- # Uncomment the following when using the bio-logger
99
+ # Uncomment the following when using the bio-logger
77
100
  # Bio::Log::CLI.configure('bio-publisci')
78
101
 
79
102
  # TODO: your code here
@@ -0,0 +1,9 @@
1
+ require 'bio-band'
2
+ require 'bio-publisci'
3
+
4
+ f = open(File.dirname(__FILE__) + '/../resources/weather.numeric.arff')
5
+ clustering = Weka::Clusterer::SimpleKMeans::Base
6
+ clustering.set_options "-N 5"
7
+ clustering.set_data(Core::Parser::parse_ARFF(f.path))
8
+ clustered = clustering.new
9
+ puts clustered
@@ -0,0 +1,40 @@
1
+ agent :publisci, subject: 'http://gsocsemantic.wordpress.com/publisci', type: "software"
2
+ agent :R, subject: "http://r-project.org"
3
+ agent :sciruby, subject: "http://sciruby.com", type: "organization"
4
+
5
+ plan :R_steps, steps: "spec/resource/example.Rhistory"
6
+
7
+ agent :Will do
8
+ subject "http://gsocsemantic.wordpress.com/me"
9
+ type "person"
10
+ name "Will Strinz"
11
+ on_behalf_of "http://sciruby.com"
12
+ end
13
+
14
+ entity :triplified_example, subject: "http://example.org/dataset/ex", generated_by: :triplify
15
+
16
+ entity :original do
17
+ generated_by :use_R
18
+ subject "http://example.org/R/ex"
19
+ source "./example.RData"
20
+
21
+ set "http://purl.org/dc/terms/title", "original data object"
22
+ end
23
+
24
+ activity :use_R do
25
+ generated :original
26
+
27
+ associated_with {
28
+ agent :R
29
+ }
30
+
31
+ associated_with :Will
32
+ end
33
+
34
+ activity :triplify do
35
+ generated "http://example.org/dataset/ex"
36
+ associated_with :publisci
37
+ used :original
38
+ end
39
+
40
+ generate_n3
@@ -0,0 +1,28 @@
1
+ # http://www.w3.org/TR/prov-primer/images/agents.png
2
+
3
+ organization :chartgen
4
+
5
+ agent :derek do
6
+ name "Derek"
7
+ on_behalf_of :chartgen
8
+ end
9
+
10
+ entity :chart1, attributed_to: :derek
11
+ entity :composition1
12
+ entity :regionList1
13
+ entity :dataSet1
14
+
15
+ activity :illustrate do
16
+ generated :chart1
17
+ associated_with :derek
18
+ used :composition1
19
+ end
20
+
21
+ activity :compose1 do
22
+ generated :composition1
23
+ associated_with :derek
24
+ used :regionList1
25
+ used :dataSet1
26
+ end
27
+
28
+ generate_n3(true)
@@ -0,0 +1,51 @@
1
+ agent :publisci, type: "software"
2
+ agent :R, subject: "http://r-project.org"
3
+
4
+ plan :R_steps, steps: "spec/resource/example.Rhistory"
5
+
6
+ organization :sciruby, subject: "http://sciruby.com"
7
+
8
+ foaf = vocabulary "http://xmlns.com/foaf/0.1/"
9
+
10
+ agent :Will do
11
+ type "person"
12
+ name "Will Strinz"
13
+ on_behalf_of :sciruby
14
+
15
+ has foaf.mailbox, "wstrinz@gmail.com"
16
+ has foaf.homepage, "http://gsocsemantic.wordpress.com/"
17
+ end
18
+
19
+ data :triplified_example do
20
+ attributed_to :Will
21
+ derived_from do
22
+ entity :original
23
+ activity :triplify
24
+ end
25
+ end
26
+
27
+ activity :triplify do
28
+ generated :triplified_example
29
+ associated_with :publisci
30
+ used :original
31
+ end
32
+
33
+ data :original do
34
+ derived_from :field_work
35
+ attributed_to :R
36
+ end
37
+
38
+ data :field_work
39
+
40
+ activity :use_R do
41
+ generated :original
42
+
43
+ associated_with {
44
+ agent :R
45
+ plan :R_steps
46
+ }
47
+
48
+ associated_with :Will
49
+ end
50
+
51
+ generate_n3 true
@@ -1,7 +1,7 @@
1
1
  Feature: create generators
2
2
 
3
3
  In order to check that objects conform to a common interface
4
- I want to be able to call a generate method on various classes
4
+ I want to be able to call a generate method on various classes
5
5
 
6
6
  Scenario: create a Dataframe generator
7
7
  Given a Dataframe generator
@@ -9,17 +9,13 @@ Feature: create generators
9
9
 
10
10
  Scenario: create a CSV generator
11
11
  Given a CSV generator
12
- Then I should be able to call its generate_n3 method
12
+ Then I should be able to call its generate_n3 method
13
13
 
14
14
  Scenario: create a RMatrix generator
15
15
  Given a RMatrix generator
16
16
  Then I should be able to call its generate_n3 method
17
17
 
18
- Scenario: create a Cross generator
19
- Given a Cross generator
20
- Then I should be able to call its generate_n3 method
21
-
22
- Scenario: create a BigCross generator
23
- Given a BigCross generator
24
- Then I should be able to call its generate_n3 method
18
+ Scenario: create a RCross generator
19
+ Given a RCross generator
20
+ Then I should be able to call its generate_n3 method
25
21
 
@@ -1,10 +1,10 @@
1
-
2
1
  Then(/^I should be able to cluster the result and print statistics$/) do
3
- f=Tempfile.open('arff'); f.write @result; f.close
4
- clustering = Weka::Clusterer::SimpleKMeans::Base
5
- clustering.set_options "-N 5"
6
- clustering.set_data(Core::Parser::parse_ARFF(f.path))
7
- f.unlink
8
- clustered = clustering.new
9
- puts clustered
2
+ pending
3
+ # f=Tempfile.open('arff'); f.write @result; f.close
4
+ # clustering = Weka::Clusterer::SimpleKMeans::Base
5
+ # clustering.set_options "-N 5"
6
+ # clustering.set_data(Core::Parser::parse_ARFF(f.path))
7
+ # f.unlink
8
+ # clustered = clustering.new
9
+ # puts clustered
10
10
  end
@@ -15,10 +15,23 @@ Feature: Receive metadata as user input or extract from data sources
15
15
 
16
16
  Scenario: Generate process information
17
17
  Given a class which includes the Metadata module
18
- When I call its provenance method with the hash {var: "example", software: {name: "R", process: 'spec/resource/example.Rhistory'}}
18
+ And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org', process: 'spec/resource/example.Rhistory'}
19
+ And the rdf dataset {resource:'http://example.org/data'}
20
+ When I call its provenance method with the source object and the rdf object
19
21
  Then I should receive a metadata string
20
22
 
21
23
  Scenario: Generate organizational provenance information
22
24
  Given a class which includes the Metadata module
23
- When I call its provenance method with the hash {var: "example", creator: "http://gsocsemantic.wordpress.com/me", organization: "http://sciruby.com/"}
25
+ And the source object {resource: 'http://example.org/software/R/var/ex', author: 'http://example.org/people/jrs', author_name: "J Random Scientist", organization: 'http://example.org/org/science', organization_name: "The League of Science" }
26
+ And the rdf dataset {resource:'http://example.org/data', author: 'http://gsocsemantic.wordpress.com/me', author_name: "Will Strinz", organization: 'http://sciruby.com/'}
27
+ When I call its provenance method with the source object and the rdf object
28
+ Then I should receive a metadata string
29
+
30
+ Scenario: Generate a provenance chain
31
+ Given a class which includes the Metadata module
32
+ And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org'}
33
+ And the rdf dataset {resource:'http://example.org/data'}
34
+ And the chain object {resource: 'http://example.org/lab_results/1', author: 'http://example.org/people/brl', author_name: "B Random Labtech"}
35
+ And the chain object {resource: 'http://example.org/field_work/1', author: 'http://example.org/people/arn', organization: 'http://example.org/org/science'}
36
+ When I call its provenance method with the source object, the rdf object, and the chain
24
37
  Then I should receive a metadata string
@@ -5,6 +5,27 @@ Given(/^a class which includes the Metadata module$/) do
5
5
  @klass = Meta
6
6
  end
7
7
 
8
+ Given(/^the source object (\{.+\})$/) do |fields|
9
+ @original = eval(fields)
10
+ end
11
+
12
+ Given(/^the rdf dataset (\{.+\})$/) do |fields|
13
+ @rdf = eval(fields)
14
+ end
15
+
16
+ Given(/^the chain object (\{.+\})$/) do |fields|
17
+ (@chain ||= []) << eval(fields)
18
+ end
19
+
20
+ When(/^I call its provenance method with the source object, the rdf object, and the chain$/) do
21
+ @response = @klass.new.provenance(@original, @rdf, @chain)
22
+ end
23
+
24
+
25
+ When(/^I call its provenance method with the source object and the rdf object$/) do
26
+ @response = @klass.new.provenance(@original, @rdf, nil)
27
+ end
28
+
8
29
  When(/^I call its basic method with the hash (\{.+\})$/) do |fields|
9
30
  fields = eval(fields)
10
31
  @response = @klass.new.basic(fields)
@@ -1,7 +1,7 @@
1
1
  require_relative '../lib/bio-publisci.rb'
2
2
 
3
3
  Given /^an ORM::DataCube entitled "(.*?)"$/ do |name|
4
- @cube = R2RDF::Dataset::ORM::DataCube.new(name: name)
4
+ @cube = R2RDF::ORM::DataCube.new(name: name)
5
5
  end
6
6
 
7
7
  Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name, opts|
@@ -12,13 +12,13 @@ Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name
12
12
 
13
13
  v = hash["value"]
14
14
  v = v[1..-1].to_sym if k[0] == ":"
15
-
15
+
16
16
  v = true if v =="true"
17
17
  v = false if v =="false"
18
18
 
19
19
  options_hash[k] = v
20
20
  }
21
- @cube = R2RDF::Dataset::ORM::DataCube.new(options_hash)
21
+ @cube = R2RDF::ORM::DataCube.new(options_hash)
22
22
  end
23
23
 
24
24
  Given(/^a turtle string from file (.*)$/) do |file|
@@ -30,7 +30,7 @@ Given(/^the URI string "(.*?)"$/) do |uri|
30
30
  end
31
31
 
32
32
  When(/^I call the ORM::DataCube class method load on it$/) do
33
- @cube = R2RDF::Dataset::ORM::DataCube.load(@string)
33
+ @cube = R2RDF::ORM::DataCube.load(@string)
34
34
  end
35
35
 
36
36
  When /^I add a "(.*?)" dimension$/ do |dim|
@@ -70,5 +70,5 @@ Then /^the to_n3 method should return a string with a "(.*?)"$/ do |search|
70
70
  end
71
71
 
72
72
  Then(/^I should receive an ORM::DataCube object$/) do
73
- @cube.is_a?(R2RDF::Dataset::ORM::DataCube).should == true
73
+ @cube.is_a?(R2RDF::ORM::DataCube).should == true
74
74
  end
@@ -0,0 +1,14 @@
1
+ Feature: Receive metadata as user input or extract from data sources
2
+
3
+ To generate clean provenance strings through a friendly interface
4
+ I want to use a DSL for the PROV ontology
5
+
6
+ Scenario: Generate without any magic (more open-world)
7
+ Given the prov DSL string from file examples/no_magic.prov
8
+ When I call Prov.run on it
9
+ Then I should receive a provenance string
10
+
11
+ Scenario: Generate from file
12
+ Given the prov DSL string from file examples/prov_dsl.prov
13
+ When I call Prov.run on it
14
+ Then I should receive a provenance string
@@ -0,0 +1,11 @@
1
+ Given(/^the prov DSL string from file (.+)$/) do |file|
2
+ @dsl_string = file
3
+ end
4
+
5
+ When(/^I call Prov\.run on it$/) do
6
+ @turtle_string = PubliSci::Prov.run(@dsl_string)
7
+ end
8
+
9
+ Then(/^I should receive a provenance string$/) do
10
+ puts @turtle_string
11
+ end