bio-publisci 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +5 -5
  4. data/bin/bio-publisci +34 -11
  5. data/examples/bio-band_integration.rb +9 -0
  6. data/examples/no_magic.prov +40 -0
  7. data/examples/primer.prov +28 -0
  8. data/examples/prov_dsl.prov +51 -0
  9. data/features/create_generator.feature +5 -9
  10. data/features/integration_steps.rb +8 -8
  11. data/features/metadata.feature +15 -2
  12. data/features/metadata_steps.rb +21 -0
  13. data/features/orm_steps.rb +5 -5
  14. data/features/prov_dsl.feature +14 -0
  15. data/features/prov_dsl_steps.rb +11 -0
  16. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
  17. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
  18. data/lib/bio-publisci/dataset/data_cube.rb +30 -26
  19. data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
  20. data/lib/bio-publisci/metadata/metadata.rb +180 -42
  21. data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
  22. data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
  23. data/lib/bio-publisci/metadata/prov/association.rb +73 -0
  24. data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
  25. data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
  26. data/lib/bio-publisci/metadata/prov/element.rb +52 -0
  27. data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
  28. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  29. data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
  30. data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
  31. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  32. data/lib/bio-publisci/output.rb +27 -0
  33. data/lib/bio-publisci/parser.rb +17 -8
  34. data/lib/bio-publisci/readers/csv.rb +9 -7
  35. data/lib/bio-publisci/readers/dataframe.rb +9 -8
  36. data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
  37. data/lib/bio-publisci/readers/r_matrix.rb +37 -13
  38. data/lib/bio-publisci/spira.rb +82 -0
  39. data/lib/bio-publisci/writers/dataframe.rb +65 -65
  40. data/lib/bio-publisci.rb +9 -4
  41. data/spec/ORM/data_cube_orm_spec.rb +3 -3
  42. data/spec/dataset_for_spec.rb +29 -0
  43. data/spec/generators/r_cross_spec.rb +51 -0
  44. data/spec/generators/r_matrix_spec.rb +14 -5
  45. metadata +42 -8
  46. data/lib/bio-publisci/readers/cross.rb +0 -72
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f03476f5595b28e10a4cb3d950b9c454d621eb4d
4
- data.tar.gz: da473d84d9c1e203de4ea1b2d7fef2579cfa8bf8
3
+ metadata.gz: 3cb588f4ec6cfa77e1e9011b687ca3e148c6ca6e
4
+ data.tar.gz: 6a257692f2317eaea75f0fe306b93e01c1de6a70
5
5
  SHA512:
6
- metadata.gz: 33662112c4df2115f15d59f415329dfa11368e8bd1a47907d62c02b476bbeb4afe0a1c43599d2c215a093d1765b7ffb05320220e21888267811c86beacef8876
7
- data.tar.gz: 683b5874d082ab155227ee92a1e9bfe15a66b0eaf4fe1e590db39a9391346ec31850b0a94d924c9d26b356fb6ed7f82a228c9d16afa1a67d9c6710086e849963
6
+ metadata.gz: 163459c5c65c023475a5cb9853c7300d8b5f2a0c4afdf8a98a9f7f7f61ba9103c15e562f01c08a19f7c2d08853ce31447b0710c29e14d9c6f954705f82c26bba
7
+ data.tar.gz: 6e07fbb66df785ef3d15820d825823fdcd7ee18fe60cd68f8009f9f92c3204fc8945c3e0ac3bf79663b66d1259036da4f6d2749bac2d34f8870c73c25d850cbc
data/Gemfile CHANGED
@@ -13,6 +13,7 @@ group :development do
13
13
  gem "bundler", ">= 1.0.21"
14
14
  gem "bio", ">= 1.4.2"
15
15
  gem "rdoc", "~> 3.12"
16
+ gem "spoon"
16
17
  end
17
18
 
18
19
 
data/Rakefile CHANGED
@@ -17,11 +17,11 @@ Jeweler::Tasks.new do |gem|
17
17
  gem.name = "bio-publisci"
18
18
  gem.homepage = "http://github.com/wstrinz/bioruby-publisci"
19
19
  gem.license = "MIT"
20
- gem.summary = %Q{Publish science data using semantic web ontologies}
21
- gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF and related technologies }
20
+ gem.summary = %Q{Publish scientific results to the semantic web}
21
+ gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF, OWL, and related technologies }
22
22
  gem.email = "wstrinz@gmail.com"
23
- gem.authors = ["wstrinz"]
24
- gem.version = "0.0.3"
23
+ gem.authors = ["Will Strinz"]
24
+ gem.version = "0.0.4"
25
25
 
26
26
  # dependencies defined in Gemfile
27
27
  end
@@ -50,7 +50,7 @@ task :default => [] do
50
50
  end
51
51
 
52
52
  task :test => [] do
53
- begin
53
+ begin
54
54
  Rake::Task[:spec].invoke
55
55
  rescue
56
56
  end
data/bin/bio-publisci CHANGED
@@ -4,25 +4,48 @@
4
4
  # Author:: wstrinz
5
5
  # Copyright:: 2013
6
6
 
7
- USAGE = "Describe bio-publisci"
7
+ version = "0.0.4" #File.new(VERSION_FILENAME).read.chomp
8
+
9
+ USAGE = <<-EOF
10
+ bio-publisci #{version} by Will Strinz 2013
11
+
12
+ Usage:
13
+
14
+ bio-publisci file
15
+ (execute PROV dsl file)
16
+ EOF
8
17
 
9
18
  gempath = File.dirname(File.dirname(__FILE__))
10
19
  $: << File.join(gempath,'lib')
20
+ require 'bio-publisci'
11
21
 
12
- VERSION_FILENAME=File.join(gempath,'VERSION')
13
- version = File.new(VERSION_FILENAME).read.chomp
22
+ #VERSION_FILENAME=File.join(gempath,'VERSION')
14
23
 
15
24
  # print banner
16
- print "bio-publisci #{version} by wstrinz 2013\n"
17
25
 
18
26
  if ARGV.size == 0
19
27
  print USAGE
28
+ elsif ARGV.size == 1
29
+ #assume file, run DSL (prov for now)
30
+ if File.exist? ARGV[0]
31
+ puts "#{PubliSci::Prov.prefixes}\n#{PubliSci::Prov.run(ARGV[0])}"
32
+ else
33
+ puts "(no file #{ARGV[0]})"
34
+ print USAGE
35
+ end
36
+ else
37
+ if ARGV.size % 2 == 0
38
+ opts=Hash[*ARGV]
39
+ puts "got options #{opts}"
40
+ else
41
+ print USAGE
42
+ end
20
43
  end
21
44
 
22
45
  require 'bio-publisci'
23
46
  require 'optparse'
24
47
 
25
- # Uncomment when using the bio-logger
48
+ # Uncomment when using the bio-logger
26
49
  # require 'bio-logger'
27
50
  # Bio::Log::CLI.logger('stderr')
28
51
  # Bio::Log::CLI.trace('info')
@@ -35,14 +58,14 @@ opts = OptionParser.new do |o|
35
58
  # TODO: your logic here, below an example
36
59
  options[:example_parameter] = 'this is a parameter'
37
60
  end
38
-
61
+
39
62
  o.separator ""
40
63
  o.on("--switch-example", 'TODO: put a description for the SWITCH') do
41
64
  # TODO: your logic here, below an example
42
65
  self[:example_switch] = true
43
66
  end
44
67
 
45
- # Uncomment the following when using the bio-logger
68
+ # Uncomment the following when using the bio-logger
46
69
  # o.separator ""
47
70
  # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
48
71
  # Bio::Log::CLI.logger(name)
@@ -51,15 +74,15 @@ opts = OptionParser.new do |o|
51
74
  # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
52
75
  # Bio::Log::CLI.trace(s)
53
76
  # end
54
- #
77
+ #
55
78
  # o.on("-q", "--quiet", "Run quietly") do |q|
56
79
  # Bio::Log::CLI.trace('error')
57
80
  # end
58
- #
81
+ #
59
82
  # o.on("-v", "--verbose", "Run verbosely") do |v|
60
83
  # Bio::Log::CLI.trace('info')
61
84
  # end
62
- #
85
+ #
63
86
  # o.on("--debug", "Show debug messages") do |v|
64
87
  # Bio::Log::CLI.trace('debug')
65
88
  # end
@@ -73,7 +96,7 @@ end
73
96
  begin
74
97
  opts.parse!(ARGV)
75
98
 
76
- # Uncomment the following when using the bio-logger
99
+ # Uncomment the following when using the bio-logger
77
100
  # Bio::Log::CLI.configure('bio-publisci')
78
101
 
79
102
  # TODO: your code here
@@ -0,0 +1,9 @@
1
+ require 'bio-band'
2
+ require 'bio-publisci'
3
+
4
+ f = open(File.dirname(__FILE__) + '/../resources/weather.numeric.arff')
5
+ clustering = Weka::Clusterer::SimpleKMeans::Base
6
+ clustering.set_options "-N 5"
7
+ clustering.set_data(Core::Parser::parse_ARFF(f.path))
8
+ clustered = clustering.new
9
+ puts clustered
@@ -0,0 +1,40 @@
1
+ agent :publisci, subject: 'http://gsocsemantic.wordpress.com/publisci', type: "software"
2
+ agent :R, subject: "http://r-project.org"
3
+ agent :sciruby, subject: "http://sciruby.com", type: "organization"
4
+
5
+ plan :R_steps, steps: "spec/resource/example.Rhistory"
6
+
7
+ agent :Will do
8
+ subject "http://gsocsemantic.wordpress.com/me"
9
+ type "person"
10
+ name "Will Strinz"
11
+ on_behalf_of "http://sciruby.com"
12
+ end
13
+
14
+ entity :triplified_example, subject: "http://example.org/dataset/ex", generated_by: :triplify
15
+
16
+ entity :original do
17
+ generated_by :use_R
18
+ subject "http://example.org/R/ex"
19
+ source "./example.RData"
20
+
21
+ set "http://purl.org/dc/terms/title", "original data object"
22
+ end
23
+
24
+ activity :use_R do
25
+ generated :original
26
+
27
+ associated_with {
28
+ agent :R
29
+ }
30
+
31
+ associated_with :Will
32
+ end
33
+
34
+ activity :triplify do
35
+ generated "http://example.org/dataset/ex"
36
+ associated_with :publisci
37
+ used :original
38
+ end
39
+
40
+ generate_n3
@@ -0,0 +1,28 @@
1
+ # http://www.w3.org/TR/prov-primer/images/agents.png
2
+
3
+ organization :chartgen
4
+
5
+ agent :derek do
6
+ name "Derek"
7
+ on_behalf_of :chartgen
8
+ end
9
+
10
+ entity :chart1, attributed_to: :derek
11
+ entity :composition1
12
+ entity :regionList1
13
+ entity :dataSet1
14
+
15
+ activity :illustrate do
16
+ generated :chart1
17
+ associated_with :derek
18
+ used :composition1
19
+ end
20
+
21
+ activity :compose1 do
22
+ generated :composition1
23
+ associated_with :derek
24
+ used :regionList1
25
+ used :dataSet1
26
+ end
27
+
28
+ generate_n3(true)
@@ -0,0 +1,51 @@
1
+ agent :publisci, type: "software"
2
+ agent :R, subject: "http://r-project.org"
3
+
4
+ plan :R_steps, steps: "spec/resource/example.Rhistory"
5
+
6
+ organization :sciruby, subject: "http://sciruby.com"
7
+
8
+ foaf = vocabulary "http://xmlns.com/foaf/0.1/"
9
+
10
+ agent :Will do
11
+ type "person"
12
+ name "Will Strinz"
13
+ on_behalf_of :sciruby
14
+
15
+ has foaf.mailbox, "wstrinz@gmail.com"
16
+ has foaf.homepage, "http://gsocsemantic.wordpress.com/"
17
+ end
18
+
19
+ data :triplified_example do
20
+ attributed_to :Will
21
+ derived_from do
22
+ entity :original
23
+ activity :triplify
24
+ end
25
+ end
26
+
27
+ activity :triplify do
28
+ generated :triplified_example
29
+ associated_with :publisci
30
+ used :original
31
+ end
32
+
33
+ data :original do
34
+ derived_from :field_work
35
+ attributed_to :R
36
+ end
37
+
38
+ data :field_work
39
+
40
+ activity :use_R do
41
+ generated :original
42
+
43
+ associated_with {
44
+ agent :R
45
+ plan :R_steps
46
+ }
47
+
48
+ associated_with :Will
49
+ end
50
+
51
+ generate_n3 true
@@ -1,7 +1,7 @@
1
1
  Feature: create generators
2
2
 
3
3
  In order to check that objects conform to a common interface
4
- I want to be able to call a generate method on various classes
4
+ I want to be able to call a generate method on various classes
5
5
 
6
6
  Scenario: create a Dataframe generator
7
7
  Given a Dataframe generator
@@ -9,17 +9,13 @@ Feature: create generators
9
9
 
10
10
  Scenario: create a CSV generator
11
11
  Given a CSV generator
12
- Then I should be able to call its generate_n3 method
12
+ Then I should be able to call its generate_n3 method
13
13
 
14
14
  Scenario: create a RMatrix generator
15
15
  Given a RMatrix generator
16
16
  Then I should be able to call its generate_n3 method
17
17
 
18
- Scenario: create a Cross generator
19
- Given a Cross generator
20
- Then I should be able to call its generate_n3 method
21
-
22
- Scenario: create a BigCross generator
23
- Given a BigCross generator
24
- Then I should be able to call its generate_n3 method
18
+ Scenario: create a RCross generator
19
+ Given a RCross generator
20
+ Then I should be able to call its generate_n3 method
25
21
 
@@ -1,10 +1,10 @@
1
-
2
1
  Then(/^I should be able to cluster the result and print statistics$/) do
3
- f=Tempfile.open('arff'); f.write @result; f.close
4
- clustering = Weka::Clusterer::SimpleKMeans::Base
5
- clustering.set_options "-N 5"
6
- clustering.set_data(Core::Parser::parse_ARFF(f.path))
7
- f.unlink
8
- clustered = clustering.new
9
- puts clustered
2
+ pending
3
+ # f=Tempfile.open('arff'); f.write @result; f.close
4
+ # clustering = Weka::Clusterer::SimpleKMeans::Base
5
+ # clustering.set_options "-N 5"
6
+ # clustering.set_data(Core::Parser::parse_ARFF(f.path))
7
+ # f.unlink
8
+ # clustered = clustering.new
9
+ # puts clustered
10
10
  end
@@ -15,10 +15,23 @@ Feature: Receive metadata as user input or extract from data sources
15
15
 
16
16
  Scenario: Generate process information
17
17
  Given a class which includes the Metadata module
18
- When I call its provenance method with the hash {var: "example", software: {name: "R", process: 'spec/resource/example.Rhistory'}}
18
+ And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org', process: 'spec/resource/example.Rhistory'}
19
+ And the rdf dataset {resource:'http://example.org/data'}
20
+ When I call its provenance method with the source object and the rdf object
19
21
  Then I should receive a metadata string
20
22
 
21
23
  Scenario: Generate organizational provenance information
22
24
  Given a class which includes the Metadata module
23
- When I call its provenance method with the hash {var: "example", creator: "http://gsocsemantic.wordpress.com/me", organization: "http://sciruby.com/"}
25
+ And the source object {resource: 'http://example.org/software/R/var/ex', author: 'http://example.org/people/jrs', author_name: "J Random Scientist", organization: 'http://example.org/org/science', organization_name: "The League of Science" }
26
+ And the rdf dataset {resource:'http://example.org/data', author: 'http://gsocsemantic.wordpress.com/me', author_name: "Will Strinz", organization: 'http://sciruby.com/'}
27
+ When I call its provenance method with the source object and the rdf object
28
+ Then I should receive a metadata string
29
+
30
+ Scenario: Generate a provenance chain
31
+ Given a class which includes the Metadata module
32
+ And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org'}
33
+ And the rdf dataset {resource:'http://example.org/data'}
34
+ And the chain object {resource: 'http://example.org/lab_results/1', author: 'http://example.org/people/brl', author_name: "B Random Labtech"}
35
+ And the chain object {resource: 'http://example.org/field_work/1', author: 'http://example.org/people/arn', organization: 'http://example.org/org/science'}
36
+ When I call its provenance method with the source object, the rdf object, and the chain
24
37
  Then I should receive a metadata string
@@ -5,6 +5,27 @@ Given(/^a class which includes the Metadata module$/) do
5
5
  @klass = Meta
6
6
  end
7
7
 
8
+ Given(/^the source object (\{.+\})$/) do |fields|
9
+ @original = eval(fields)
10
+ end
11
+
12
+ Given(/^the rdf dataset (\{.+\})$/) do |fields|
13
+ @rdf = eval(fields)
14
+ end
15
+
16
+ Given(/^the chain object (\{.+\})$/) do |fields|
17
+ (@chain ||= []) << eval(fields)
18
+ end
19
+
20
+ When(/^I call its provenance method with the source object, the rdf object, and the chain$/) do
21
+ @response = @klass.new.provenance(@original, @rdf, @chain)
22
+ end
23
+
24
+
25
+ When(/^I call its provenance method with the source object and the rdf object$/) do
26
+ @response = @klass.new.provenance(@original, @rdf, nil)
27
+ end
28
+
8
29
  When(/^I call its basic method with the hash (\{.+\})$/) do |fields|
9
30
  fields = eval(fields)
10
31
  @response = @klass.new.basic(fields)
@@ -1,7 +1,7 @@
1
1
  require_relative '../lib/bio-publisci.rb'
2
2
 
3
3
  Given /^an ORM::DataCube entitled "(.*?)"$/ do |name|
4
- @cube = R2RDF::Dataset::ORM::DataCube.new(name: name)
4
+ @cube = R2RDF::ORM::DataCube.new(name: name)
5
5
  end
6
6
 
7
7
  Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name, opts|
@@ -12,13 +12,13 @@ Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name
12
12
 
13
13
  v = hash["value"]
14
14
  v = v[1..-1].to_sym if k[0] == ":"
15
-
15
+
16
16
  v = true if v =="true"
17
17
  v = false if v =="false"
18
18
 
19
19
  options_hash[k] = v
20
20
  }
21
- @cube = R2RDF::Dataset::ORM::DataCube.new(options_hash)
21
+ @cube = R2RDF::ORM::DataCube.new(options_hash)
22
22
  end
23
23
 
24
24
  Given(/^a turtle string from file (.*)$/) do |file|
@@ -30,7 +30,7 @@ Given(/^the URI string "(.*?)"$/) do |uri|
30
30
  end
31
31
 
32
32
  When(/^I call the ORM::DataCube class method load on it$/) do
33
- @cube = R2RDF::Dataset::ORM::DataCube.load(@string)
33
+ @cube = R2RDF::ORM::DataCube.load(@string)
34
34
  end
35
35
 
36
36
  When /^I add a "(.*?)" dimension$/ do |dim|
@@ -70,5 +70,5 @@ Then /^the to_n3 method should return a string with a "(.*?)"$/ do |search|
70
70
  end
71
71
 
72
72
  Then(/^I should receive an ORM::DataCube object$/) do
73
- @cube.is_a?(R2RDF::Dataset::ORM::DataCube).should == true
73
+ @cube.is_a?(R2RDF::ORM::DataCube).should == true
74
74
  end
@@ -0,0 +1,14 @@
1
+ Feature: Receive metadata as user input or extract from data sources
2
+
3
+ To generate clean provenance strings through a friendly interface
4
+ I want to use a DSL for the PROV ontology
5
+
6
+ Scenario: Generate without any magic (more open-world)
7
+ Given the prov DSL string from file examples/no_magic.prov
8
+ When I call Prov.run on it
9
+ Then I should receive a provenance string
10
+
11
+ Scenario: Generate from file
12
+ Given the prov DSL string from file examples/prov_dsl.prov
13
+ When I call Prov.run on it
14
+ Then I should receive a provenance string
@@ -0,0 +1,11 @@
1
+ Given(/^the prov DSL string from file (.+)$/) do |file|
2
+ @dsl_string = file
3
+ end
4
+
5
+ When(/^I call Prov\.run on it$/) do
6
+ @turtle_string = PubliSci::Prov.run(@dsl_string)
7
+ end
8
+
9
+ Then(/^I should receive a provenance string$/) do
10
+ puts @turtle_string
11
+ end