bio-publisci 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Rakefile +5 -5
- data/bin/bio-publisci +34 -11
- data/examples/bio-band_integration.rb +9 -0
- data/examples/no_magic.prov +40 -0
- data/examples/primer.prov +28 -0
- data/examples/prov_dsl.prov +51 -0
- data/features/create_generator.feature +5 -9
- data/features/integration_steps.rb +8 -8
- data/features/metadata.feature +15 -2
- data/features/metadata_steps.rb +21 -0
- data/features/orm_steps.rb +5 -5
- data/features/prov_dsl.feature +14 -0
- data/features/prov_dsl_steps.rb +11 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
- data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
- data/lib/bio-publisci/dataset/data_cube.rb +30 -26
- data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
- data/lib/bio-publisci/metadata/metadata.rb +180 -42
- data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
- data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
- data/lib/bio-publisci/metadata/prov/association.rb +73 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
- data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
- data/lib/bio-publisci/metadata/prov/element.rb +52 -0
- data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
- data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
- data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
- data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
- data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
- data/lib/bio-publisci/output.rb +27 -0
- data/lib/bio-publisci/parser.rb +17 -8
- data/lib/bio-publisci/readers/csv.rb +9 -7
- data/lib/bio-publisci/readers/dataframe.rb +9 -8
- data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
- data/lib/bio-publisci/readers/r_matrix.rb +37 -13
- data/lib/bio-publisci/spira.rb +82 -0
- data/lib/bio-publisci/writers/dataframe.rb +65 -65
- data/lib/bio-publisci.rb +9 -4
- data/spec/ORM/data_cube_orm_spec.rb +3 -3
- data/spec/dataset_for_spec.rb +29 -0
- data/spec/generators/r_cross_spec.rb +51 -0
- data/spec/generators/r_matrix_spec.rb +14 -5
- metadata +42 -8
- data/lib/bio-publisci/readers/cross.rb +0 -72
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3cb588f4ec6cfa77e1e9011b687ca3e148c6ca6e
|
4
|
+
data.tar.gz: 6a257692f2317eaea75f0fe306b93e01c1de6a70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 163459c5c65c023475a5cb9853c7300d8b5f2a0c4afdf8a98a9f7f7f61ba9103c15e562f01c08a19f7c2d08853ce31447b0710c29e14d9c6f954705f82c26bba
|
7
|
+
data.tar.gz: 6e07fbb66df785ef3d15820d825823fdcd7ee18fe60cd68f8009f9f92c3204fc8945c3e0ac3bf79663b66d1259036da4f6d2749bac2d34f8870c73c25d850cbc
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -17,11 +17,11 @@ Jeweler::Tasks.new do |gem|
|
|
17
17
|
gem.name = "bio-publisci"
|
18
18
|
gem.homepage = "http://github.com/wstrinz/bioruby-publisci"
|
19
19
|
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{Publish
|
21
|
-
gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF and related technologies }
|
20
|
+
gem.summary = %Q{Publish scientific results to the semantic web}
|
21
|
+
gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF, OWL, and related technologies }
|
22
22
|
gem.email = "wstrinz@gmail.com"
|
23
|
-
gem.authors = ["
|
24
|
-
gem.version = "0.0.
|
23
|
+
gem.authors = ["Will Strinz"]
|
24
|
+
gem.version = "0.0.4"
|
25
25
|
|
26
26
|
# dependencies defined in Gemfile
|
27
27
|
end
|
@@ -50,7 +50,7 @@ task :default => [] do
|
|
50
50
|
end
|
51
51
|
|
52
52
|
task :test => [] do
|
53
|
-
begin
|
53
|
+
begin
|
54
54
|
Rake::Task[:spec].invoke
|
55
55
|
rescue
|
56
56
|
end
|
data/bin/bio-publisci
CHANGED
@@ -4,25 +4,48 @@
|
|
4
4
|
# Author:: wstrinz
|
5
5
|
# Copyright:: 2013
|
6
6
|
|
7
|
-
|
7
|
+
version = "0.0.4" #File.new(VERSION_FILENAME).read.chomp
|
8
|
+
|
9
|
+
USAGE = <<-EOF
|
10
|
+
bio-publisci #{version} by Will Strinz 2013
|
11
|
+
|
12
|
+
Usage:
|
13
|
+
|
14
|
+
bio-publisci file
|
15
|
+
(execute PROV dsl file)
|
16
|
+
EOF
|
8
17
|
|
9
18
|
gempath = File.dirname(File.dirname(__FILE__))
|
10
19
|
$: << File.join(gempath,'lib')
|
20
|
+
require 'bio-publisci'
|
11
21
|
|
12
|
-
VERSION_FILENAME=File.join(gempath,'VERSION')
|
13
|
-
version = File.new(VERSION_FILENAME).read.chomp
|
22
|
+
#VERSION_FILENAME=File.join(gempath,'VERSION')
|
14
23
|
|
15
24
|
# print banner
|
16
|
-
print "bio-publisci #{version} by wstrinz 2013\n"
|
17
25
|
|
18
26
|
if ARGV.size == 0
|
19
27
|
print USAGE
|
28
|
+
elsif ARGV.size == 1
|
29
|
+
#assume file, run DSL (prov for now)
|
30
|
+
if File.exist? ARGV[0]
|
31
|
+
puts "#{PubliSci::Prov.prefixes}\n#{PubliSci::Prov.run(ARGV[0])}"
|
32
|
+
else
|
33
|
+
puts "(no file #{ARGV[0]})"
|
34
|
+
print USAGE
|
35
|
+
end
|
36
|
+
else
|
37
|
+
if ARGV.size % 2 == 0
|
38
|
+
opts=Hash[*ARGV]
|
39
|
+
puts "got options #{opts}"
|
40
|
+
else
|
41
|
+
print USAGE
|
42
|
+
end
|
20
43
|
end
|
21
44
|
|
22
45
|
require 'bio-publisci'
|
23
46
|
require 'optparse'
|
24
47
|
|
25
|
-
# Uncomment when using the bio-logger
|
48
|
+
# Uncomment when using the bio-logger
|
26
49
|
# require 'bio-logger'
|
27
50
|
# Bio::Log::CLI.logger('stderr')
|
28
51
|
# Bio::Log::CLI.trace('info')
|
@@ -35,14 +58,14 @@ opts = OptionParser.new do |o|
|
|
35
58
|
# TODO: your logic here, below an example
|
36
59
|
options[:example_parameter] = 'this is a parameter'
|
37
60
|
end
|
38
|
-
|
61
|
+
|
39
62
|
o.separator ""
|
40
63
|
o.on("--switch-example", 'TODO: put a description for the SWITCH') do
|
41
64
|
# TODO: your logic here, below an example
|
42
65
|
self[:example_switch] = true
|
43
66
|
end
|
44
67
|
|
45
|
-
# Uncomment the following when using the bio-logger
|
68
|
+
# Uncomment the following when using the bio-logger
|
46
69
|
# o.separator ""
|
47
70
|
# o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
48
71
|
# Bio::Log::CLI.logger(name)
|
@@ -51,15 +74,15 @@ opts = OptionParser.new do |o|
|
|
51
74
|
# o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
52
75
|
# Bio::Log::CLI.trace(s)
|
53
76
|
# end
|
54
|
-
#
|
77
|
+
#
|
55
78
|
# o.on("-q", "--quiet", "Run quietly") do |q|
|
56
79
|
# Bio::Log::CLI.trace('error')
|
57
80
|
# end
|
58
|
-
#
|
81
|
+
#
|
59
82
|
# o.on("-v", "--verbose", "Run verbosely") do |v|
|
60
83
|
# Bio::Log::CLI.trace('info')
|
61
84
|
# end
|
62
|
-
#
|
85
|
+
#
|
63
86
|
# o.on("--debug", "Show debug messages") do |v|
|
64
87
|
# Bio::Log::CLI.trace('debug')
|
65
88
|
# end
|
@@ -73,7 +96,7 @@ end
|
|
73
96
|
begin
|
74
97
|
opts.parse!(ARGV)
|
75
98
|
|
76
|
-
# Uncomment the following when using the bio-logger
|
99
|
+
# Uncomment the following when using the bio-logger
|
77
100
|
# Bio::Log::CLI.configure('bio-publisci')
|
78
101
|
|
79
102
|
# TODO: your code here
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'bio-band'
|
2
|
+
require 'bio-publisci'
|
3
|
+
|
4
|
+
f = open(File.dirname(__FILE__) + '/../resources/weather.numeric.arff')
|
5
|
+
clustering = Weka::Clusterer::SimpleKMeans::Base
|
6
|
+
clustering.set_options "-N 5"
|
7
|
+
clustering.set_data(Core::Parser::parse_ARFF(f.path))
|
8
|
+
clustered = clustering.new
|
9
|
+
puts clustered
|
@@ -0,0 +1,40 @@
|
|
1
|
+
agent :publisci, subject: 'http://gsocsemantic.wordpress.com/publisci', type: "software"
|
2
|
+
agent :R, subject: "http://r-project.org"
|
3
|
+
agent :sciruby, subject: "http://sciruby.com", type: "organization"
|
4
|
+
|
5
|
+
plan :R_steps, steps: "spec/resource/example.Rhistory"
|
6
|
+
|
7
|
+
agent :Will do
|
8
|
+
subject "http://gsocsemantic.wordpress.com/me"
|
9
|
+
type "person"
|
10
|
+
name "Will Strinz"
|
11
|
+
on_behalf_of "http://sciruby.com"
|
12
|
+
end
|
13
|
+
|
14
|
+
entity :triplified_example, subject: "http://example.org/dataset/ex", generated_by: :triplify
|
15
|
+
|
16
|
+
entity :original do
|
17
|
+
generated_by :use_R
|
18
|
+
subject "http://example.org/R/ex"
|
19
|
+
source "./example.RData"
|
20
|
+
|
21
|
+
set "http://purl.org/dc/terms/title", "original data object"
|
22
|
+
end
|
23
|
+
|
24
|
+
activity :use_R do
|
25
|
+
generated :original
|
26
|
+
|
27
|
+
associated_with {
|
28
|
+
agent :R
|
29
|
+
}
|
30
|
+
|
31
|
+
associated_with :Will
|
32
|
+
end
|
33
|
+
|
34
|
+
activity :triplify do
|
35
|
+
generated "http://example.org/dataset/ex"
|
36
|
+
associated_with :publisci
|
37
|
+
used :original
|
38
|
+
end
|
39
|
+
|
40
|
+
generate_n3
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# http://www.w3.org/TR/prov-primer/images/agents.png
|
2
|
+
|
3
|
+
organization :chartgen
|
4
|
+
|
5
|
+
agent :derek do
|
6
|
+
name "Derek"
|
7
|
+
on_behalf_of :chartgen
|
8
|
+
end
|
9
|
+
|
10
|
+
entity :chart1, attributed_to: :derek
|
11
|
+
entity :composition1
|
12
|
+
entity :regionList1
|
13
|
+
entity :dataSet1
|
14
|
+
|
15
|
+
activity :illustrate do
|
16
|
+
generated :chart1
|
17
|
+
associated_with :derek
|
18
|
+
used :composition1
|
19
|
+
end
|
20
|
+
|
21
|
+
activity :compose1 do
|
22
|
+
generated :composition1
|
23
|
+
associated_with :derek
|
24
|
+
used :regionList1
|
25
|
+
used :dataSet1
|
26
|
+
end
|
27
|
+
|
28
|
+
generate_n3(true)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
agent :publisci, type: "software"
|
2
|
+
agent :R, subject: "http://r-project.org"
|
3
|
+
|
4
|
+
plan :R_steps, steps: "spec/resource/example.Rhistory"
|
5
|
+
|
6
|
+
organization :sciruby, subject: "http://sciruby.com"
|
7
|
+
|
8
|
+
foaf = vocabulary "http://xmlns.com/foaf/0.1/"
|
9
|
+
|
10
|
+
agent :Will do
|
11
|
+
type "person"
|
12
|
+
name "Will Strinz"
|
13
|
+
on_behalf_of :sciruby
|
14
|
+
|
15
|
+
has foaf.mailbox, "wstrinz@gmail.com"
|
16
|
+
has foaf.homepage, "http://gsocsemantic.wordpress.com/"
|
17
|
+
end
|
18
|
+
|
19
|
+
data :triplified_example do
|
20
|
+
attributed_to :Will
|
21
|
+
derived_from do
|
22
|
+
entity :original
|
23
|
+
activity :triplify
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
activity :triplify do
|
28
|
+
generated :triplified_example
|
29
|
+
associated_with :publisci
|
30
|
+
used :original
|
31
|
+
end
|
32
|
+
|
33
|
+
data :original do
|
34
|
+
derived_from :field_work
|
35
|
+
attributed_to :R
|
36
|
+
end
|
37
|
+
|
38
|
+
data :field_work
|
39
|
+
|
40
|
+
activity :use_R do
|
41
|
+
generated :original
|
42
|
+
|
43
|
+
associated_with {
|
44
|
+
agent :R
|
45
|
+
plan :R_steps
|
46
|
+
}
|
47
|
+
|
48
|
+
associated_with :Will
|
49
|
+
end
|
50
|
+
|
51
|
+
generate_n3 true
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Feature: create generators
|
2
2
|
|
3
3
|
In order to check that objects conform to a common interface
|
4
|
-
I want to be able to call a generate method on various classes
|
4
|
+
I want to be able to call a generate method on various classes
|
5
5
|
|
6
6
|
Scenario: create a Dataframe generator
|
7
7
|
Given a Dataframe generator
|
@@ -9,17 +9,13 @@ Feature: create generators
|
|
9
9
|
|
10
10
|
Scenario: create a CSV generator
|
11
11
|
Given a CSV generator
|
12
|
-
Then I should be able to call its generate_n3 method
|
12
|
+
Then I should be able to call its generate_n3 method
|
13
13
|
|
14
14
|
Scenario: create a RMatrix generator
|
15
15
|
Given a RMatrix generator
|
16
16
|
Then I should be able to call its generate_n3 method
|
17
17
|
|
18
|
-
Scenario: create a
|
19
|
-
Given a
|
20
|
-
Then I should be able to call its generate_n3 method
|
21
|
-
|
22
|
-
Scenario: create a BigCross generator
|
23
|
-
Given a BigCross generator
|
24
|
-
Then I should be able to call its generate_n3 method
|
18
|
+
Scenario: create a RCross generator
|
19
|
+
Given a RCross generator
|
20
|
+
Then I should be able to call its generate_n3 method
|
25
21
|
|
@@ -1,10 +1,10 @@
|
|
1
|
-
|
2
1
|
Then(/^I should be able to cluster the result and print statistics$/) do
|
3
|
-
|
4
|
-
|
5
|
-
clustering
|
6
|
-
clustering.
|
7
|
-
f.
|
8
|
-
|
9
|
-
|
2
|
+
pending
|
3
|
+
# f=Tempfile.open('arff'); f.write @result; f.close
|
4
|
+
# clustering = Weka::Clusterer::SimpleKMeans::Base
|
5
|
+
# clustering.set_options "-N 5"
|
6
|
+
# clustering.set_data(Core::Parser::parse_ARFF(f.path))
|
7
|
+
# f.unlink
|
8
|
+
# clustered = clustering.new
|
9
|
+
# puts clustered
|
10
10
|
end
|
data/features/metadata.feature
CHANGED
@@ -15,10 +15,23 @@ Feature: Receive metadata as user input or extract from data sources
|
|
15
15
|
|
16
16
|
Scenario: Generate process information
|
17
17
|
Given a class which includes the Metadata module
|
18
|
-
|
18
|
+
And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org', process: 'spec/resource/example.Rhistory'}
|
19
|
+
And the rdf dataset {resource:'http://example.org/data'}
|
20
|
+
When I call its provenance method with the source object and the rdf object
|
19
21
|
Then I should receive a metadata string
|
20
22
|
|
21
23
|
Scenario: Generate organizational provenance information
|
22
24
|
Given a class which includes the Metadata module
|
23
|
-
|
25
|
+
And the source object {resource: 'http://example.org/software/R/var/ex', author: 'http://example.org/people/jrs', author_name: "J Random Scientist", organization: 'http://example.org/org/science', organization_name: "The League of Science" }
|
26
|
+
And the rdf dataset {resource:'http://example.org/data', author: 'http://gsocsemantic.wordpress.com/me', author_name: "Will Strinz", organization: 'http://sciruby.com/'}
|
27
|
+
When I call its provenance method with the source object and the rdf object
|
28
|
+
Then I should receive a metadata string
|
29
|
+
|
30
|
+
Scenario: Generate a provenance chain
|
31
|
+
Given a class which includes the Metadata module
|
32
|
+
And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org'}
|
33
|
+
And the rdf dataset {resource:'http://example.org/data'}
|
34
|
+
And the chain object {resource: 'http://example.org/lab_results/1', author: 'http://example.org/people/brl', author_name: "B Random Labtech"}
|
35
|
+
And the chain object {resource: 'http://example.org/field_work/1', author: 'http://example.org/people/arn', organization: 'http://example.org/org/science'}
|
36
|
+
When I call its provenance method with the source object, the rdf object, and the chain
|
24
37
|
Then I should receive a metadata string
|
data/features/metadata_steps.rb
CHANGED
@@ -5,6 +5,27 @@ Given(/^a class which includes the Metadata module$/) do
|
|
5
5
|
@klass = Meta
|
6
6
|
end
|
7
7
|
|
8
|
+
Given(/^the source object (\{.+\})$/) do |fields|
|
9
|
+
@original = eval(fields)
|
10
|
+
end
|
11
|
+
|
12
|
+
Given(/^the rdf dataset (\{.+\})$/) do |fields|
|
13
|
+
@rdf = eval(fields)
|
14
|
+
end
|
15
|
+
|
16
|
+
Given(/^the chain object (\{.+\})$/) do |fields|
|
17
|
+
(@chain ||= []) << eval(fields)
|
18
|
+
end
|
19
|
+
|
20
|
+
When(/^I call its provenance method with the source object, the rdf object, and the chain$/) do
|
21
|
+
@response = @klass.new.provenance(@original, @rdf, @chain)
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
When(/^I call its provenance method with the source object and the rdf object$/) do
|
26
|
+
@response = @klass.new.provenance(@original, @rdf, nil)
|
27
|
+
end
|
28
|
+
|
8
29
|
When(/^I call its basic method with the hash (\{.+\})$/) do |fields|
|
9
30
|
fields = eval(fields)
|
10
31
|
@response = @klass.new.basic(fields)
|
data/features/orm_steps.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative '../lib/bio-publisci.rb'
|
2
2
|
|
3
3
|
Given /^an ORM::DataCube entitled "(.*?)"$/ do |name|
|
4
|
-
@cube = R2RDF::
|
4
|
+
@cube = R2RDF::ORM::DataCube.new(name: name)
|
5
5
|
end
|
6
6
|
|
7
7
|
Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name, opts|
|
@@ -12,13 +12,13 @@ Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name
|
|
12
12
|
|
13
13
|
v = hash["value"]
|
14
14
|
v = v[1..-1].to_sym if k[0] == ":"
|
15
|
-
|
15
|
+
|
16
16
|
v = true if v =="true"
|
17
17
|
v = false if v =="false"
|
18
18
|
|
19
19
|
options_hash[k] = v
|
20
20
|
}
|
21
|
-
@cube = R2RDF::
|
21
|
+
@cube = R2RDF::ORM::DataCube.new(options_hash)
|
22
22
|
end
|
23
23
|
|
24
24
|
Given(/^a turtle string from file (.*)$/) do |file|
|
@@ -30,7 +30,7 @@ Given(/^the URI string "(.*?)"$/) do |uri|
|
|
30
30
|
end
|
31
31
|
|
32
32
|
When(/^I call the ORM::DataCube class method load on it$/) do
|
33
|
-
@cube = R2RDF::
|
33
|
+
@cube = R2RDF::ORM::DataCube.load(@string)
|
34
34
|
end
|
35
35
|
|
36
36
|
When /^I add a "(.*?)" dimension$/ do |dim|
|
@@ -70,5 +70,5 @@ Then /^the to_n3 method should return a string with a "(.*?)"$/ do |search|
|
|
70
70
|
end
|
71
71
|
|
72
72
|
Then(/^I should receive an ORM::DataCube object$/) do
|
73
|
-
@cube.is_a?(R2RDF::
|
73
|
+
@cube.is_a?(R2RDF::ORM::DataCube).should == true
|
74
74
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
Feature: Receive metadata as user input or extract from data sources
|
2
|
+
|
3
|
+
To generate clean provenance strings through a friendly interface
|
4
|
+
I want to use a DSL for the PROV ontology
|
5
|
+
|
6
|
+
Scenario: Generate without any magic (more open-world)
|
7
|
+
Given the prov DSL string from file examples/no_magic.prov
|
8
|
+
When I call Prov.run on it
|
9
|
+
Then I should receive a provenance string
|
10
|
+
|
11
|
+
Scenario: Generate from file
|
12
|
+
Given the prov DSL string from file examples/prov_dsl.prov
|
13
|
+
When I call Prov.run on it
|
14
|
+
Then I should receive a provenance string
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Given(/^the prov DSL string from file (.+)$/) do |file|
|
2
|
+
@dsl_string = file
|
3
|
+
end
|
4
|
+
|
5
|
+
When(/^I call Prov\.run on it$/) do
|
6
|
+
@turtle_string = PubliSci::Prov.run(@dsl_string)
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I should receive a provenance string$/) do
|
10
|
+
puts @turtle_string
|
11
|
+
end
|