bio-publisci 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Rakefile +5 -5
- data/bin/bio-publisci +34 -11
- data/examples/bio-band_integration.rb +9 -0
- data/examples/no_magic.prov +40 -0
- data/examples/primer.prov +28 -0
- data/examples/prov_dsl.prov +51 -0
- data/features/create_generator.feature +5 -9
- data/features/integration_steps.rb +8 -8
- data/features/metadata.feature +15 -2
- data/features/metadata_steps.rb +21 -0
- data/features/orm_steps.rb +5 -5
- data/features/prov_dsl.feature +14 -0
- data/features/prov_dsl_steps.rb +11 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
- data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
- data/lib/bio-publisci/dataset/data_cube.rb +30 -26
- data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
- data/lib/bio-publisci/metadata/metadata.rb +180 -42
- data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
- data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
- data/lib/bio-publisci/metadata/prov/association.rb +73 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
- data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
- data/lib/bio-publisci/metadata/prov/element.rb +52 -0
- data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
- data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
- data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
- data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
- data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
- data/lib/bio-publisci/output.rb +27 -0
- data/lib/bio-publisci/parser.rb +17 -8
- data/lib/bio-publisci/readers/csv.rb +9 -7
- data/lib/bio-publisci/readers/dataframe.rb +9 -8
- data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
- data/lib/bio-publisci/readers/r_matrix.rb +37 -13
- data/lib/bio-publisci/spira.rb +82 -0
- data/lib/bio-publisci/writers/dataframe.rb +65 -65
- data/lib/bio-publisci.rb +9 -4
- data/spec/ORM/data_cube_orm_spec.rb +3 -3
- data/spec/dataset_for_spec.rb +29 -0
- data/spec/generators/r_cross_spec.rb +51 -0
- data/spec/generators/r_matrix_spec.rb +14 -5
- metadata +42 -8
- data/lib/bio-publisci/readers/cross.rb +0 -72
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3cb588f4ec6cfa77e1e9011b687ca3e148c6ca6e
|
4
|
+
data.tar.gz: 6a257692f2317eaea75f0fe306b93e01c1de6a70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 163459c5c65c023475a5cb9853c7300d8b5f2a0c4afdf8a98a9f7f7f61ba9103c15e562f01c08a19f7c2d08853ce31447b0710c29e14d9c6f954705f82c26bba
|
7
|
+
data.tar.gz: 6e07fbb66df785ef3d15820d825823fdcd7ee18fe60cd68f8009f9f92c3204fc8945c3e0ac3bf79663b66d1259036da4f6d2749bac2d34f8870c73c25d850cbc
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -17,11 +17,11 @@ Jeweler::Tasks.new do |gem|
|
|
17
17
|
gem.name = "bio-publisci"
|
18
18
|
gem.homepage = "http://github.com/wstrinz/bioruby-publisci"
|
19
19
|
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{Publish
|
21
|
-
gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF and related technologies }
|
20
|
+
gem.summary = %Q{Publish scientific results to the semantic web}
|
21
|
+
gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF, OWL, and related technologies }
|
22
22
|
gem.email = "wstrinz@gmail.com"
|
23
|
-
gem.authors = ["
|
24
|
-
gem.version = "0.0.
|
23
|
+
gem.authors = ["Will Strinz"]
|
24
|
+
gem.version = "0.0.4"
|
25
25
|
|
26
26
|
# dependencies defined in Gemfile
|
27
27
|
end
|
@@ -50,7 +50,7 @@ task :default => [] do
|
|
50
50
|
end
|
51
51
|
|
52
52
|
task :test => [] do
|
53
|
-
begin
|
53
|
+
begin
|
54
54
|
Rake::Task[:spec].invoke
|
55
55
|
rescue
|
56
56
|
end
|
data/bin/bio-publisci
CHANGED
@@ -4,25 +4,48 @@
|
|
4
4
|
# Author:: wstrinz
|
5
5
|
# Copyright:: 2013
|
6
6
|
|
7
|
-
|
7
|
+
version = "0.0.4" #File.new(VERSION_FILENAME).read.chomp
|
8
|
+
|
9
|
+
USAGE = <<-EOF
|
10
|
+
bio-publisci #{version} by Will Strinz 2013
|
11
|
+
|
12
|
+
Usage:
|
13
|
+
|
14
|
+
bio-publisci file
|
15
|
+
(execute PROV dsl file)
|
16
|
+
EOF
|
8
17
|
|
9
18
|
gempath = File.dirname(File.dirname(__FILE__))
|
10
19
|
$: << File.join(gempath,'lib')
|
20
|
+
require 'bio-publisci'
|
11
21
|
|
12
|
-
VERSION_FILENAME=File.join(gempath,'VERSION')
|
13
|
-
version = File.new(VERSION_FILENAME).read.chomp
|
22
|
+
#VERSION_FILENAME=File.join(gempath,'VERSION')
|
14
23
|
|
15
24
|
# print banner
|
16
|
-
print "bio-publisci #{version} by wstrinz 2013\n"
|
17
25
|
|
18
26
|
if ARGV.size == 0
|
19
27
|
print USAGE
|
28
|
+
elsif ARGV.size == 1
|
29
|
+
#assume file, run DSL (prov for now)
|
30
|
+
if File.exist? ARGV[0]
|
31
|
+
puts "#{PubliSci::Prov.prefixes}\n#{PubliSci::Prov.run(ARGV[0])}"
|
32
|
+
else
|
33
|
+
puts "(no file #{ARGV[0]})"
|
34
|
+
print USAGE
|
35
|
+
end
|
36
|
+
else
|
37
|
+
if ARGV.size % 2 == 0
|
38
|
+
opts=Hash[*ARGV]
|
39
|
+
puts "got options #{opts}"
|
40
|
+
else
|
41
|
+
print USAGE
|
42
|
+
end
|
20
43
|
end
|
21
44
|
|
22
45
|
require 'bio-publisci'
|
23
46
|
require 'optparse'
|
24
47
|
|
25
|
-
# Uncomment when using the bio-logger
|
48
|
+
# Uncomment when using the bio-logger
|
26
49
|
# require 'bio-logger'
|
27
50
|
# Bio::Log::CLI.logger('stderr')
|
28
51
|
# Bio::Log::CLI.trace('info')
|
@@ -35,14 +58,14 @@ opts = OptionParser.new do |o|
|
|
35
58
|
# TODO: your logic here, below an example
|
36
59
|
options[:example_parameter] = 'this is a parameter'
|
37
60
|
end
|
38
|
-
|
61
|
+
|
39
62
|
o.separator ""
|
40
63
|
o.on("--switch-example", 'TODO: put a description for the SWITCH') do
|
41
64
|
# TODO: your logic here, below an example
|
42
65
|
self[:example_switch] = true
|
43
66
|
end
|
44
67
|
|
45
|
-
# Uncomment the following when using the bio-logger
|
68
|
+
# Uncomment the following when using the bio-logger
|
46
69
|
# o.separator ""
|
47
70
|
# o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
48
71
|
# Bio::Log::CLI.logger(name)
|
@@ -51,15 +74,15 @@ opts = OptionParser.new do |o|
|
|
51
74
|
# o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
52
75
|
# Bio::Log::CLI.trace(s)
|
53
76
|
# end
|
54
|
-
#
|
77
|
+
#
|
55
78
|
# o.on("-q", "--quiet", "Run quietly") do |q|
|
56
79
|
# Bio::Log::CLI.trace('error')
|
57
80
|
# end
|
58
|
-
#
|
81
|
+
#
|
59
82
|
# o.on("-v", "--verbose", "Run verbosely") do |v|
|
60
83
|
# Bio::Log::CLI.trace('info')
|
61
84
|
# end
|
62
|
-
#
|
85
|
+
#
|
63
86
|
# o.on("--debug", "Show debug messages") do |v|
|
64
87
|
# Bio::Log::CLI.trace('debug')
|
65
88
|
# end
|
@@ -73,7 +96,7 @@ end
|
|
73
96
|
begin
|
74
97
|
opts.parse!(ARGV)
|
75
98
|
|
76
|
-
# Uncomment the following when using the bio-logger
|
99
|
+
# Uncomment the following when using the bio-logger
|
77
100
|
# Bio::Log::CLI.configure('bio-publisci')
|
78
101
|
|
79
102
|
# TODO: your code here
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'bio-band'
|
2
|
+
require 'bio-publisci'
|
3
|
+
|
4
|
+
f = open(File.dirname(__FILE__) + '/../resources/weather.numeric.arff')
|
5
|
+
clustering = Weka::Clusterer::SimpleKMeans::Base
|
6
|
+
clustering.set_options "-N 5"
|
7
|
+
clustering.set_data(Core::Parser::parse_ARFF(f.path))
|
8
|
+
clustered = clustering.new
|
9
|
+
puts clustered
|
@@ -0,0 +1,40 @@
|
|
1
|
+
agent :publisci, subject: 'http://gsocsemantic.wordpress.com/publisci', type: "software"
|
2
|
+
agent :R, subject: "http://r-project.org"
|
3
|
+
agent :sciruby, subject: "http://sciruby.com", type: "organization"
|
4
|
+
|
5
|
+
plan :R_steps, steps: "spec/resource/example.Rhistory"
|
6
|
+
|
7
|
+
agent :Will do
|
8
|
+
subject "http://gsocsemantic.wordpress.com/me"
|
9
|
+
type "person"
|
10
|
+
name "Will Strinz"
|
11
|
+
on_behalf_of "http://sciruby.com"
|
12
|
+
end
|
13
|
+
|
14
|
+
entity :triplified_example, subject: "http://example.org/dataset/ex", generated_by: :triplify
|
15
|
+
|
16
|
+
entity :original do
|
17
|
+
generated_by :use_R
|
18
|
+
subject "http://example.org/R/ex"
|
19
|
+
source "./example.RData"
|
20
|
+
|
21
|
+
set "http://purl.org/dc/terms/title", "original data object"
|
22
|
+
end
|
23
|
+
|
24
|
+
activity :use_R do
|
25
|
+
generated :original
|
26
|
+
|
27
|
+
associated_with {
|
28
|
+
agent :R
|
29
|
+
}
|
30
|
+
|
31
|
+
associated_with :Will
|
32
|
+
end
|
33
|
+
|
34
|
+
activity :triplify do
|
35
|
+
generated "http://example.org/dataset/ex"
|
36
|
+
associated_with :publisci
|
37
|
+
used :original
|
38
|
+
end
|
39
|
+
|
40
|
+
generate_n3
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# http://www.w3.org/TR/prov-primer/images/agents.png
|
2
|
+
|
3
|
+
organization :chartgen
|
4
|
+
|
5
|
+
agent :derek do
|
6
|
+
name "Derek"
|
7
|
+
on_behalf_of :chartgen
|
8
|
+
end
|
9
|
+
|
10
|
+
entity :chart1, attributed_to: :derek
|
11
|
+
entity :composition1
|
12
|
+
entity :regionList1
|
13
|
+
entity :dataSet1
|
14
|
+
|
15
|
+
activity :illustrate do
|
16
|
+
generated :chart1
|
17
|
+
associated_with :derek
|
18
|
+
used :composition1
|
19
|
+
end
|
20
|
+
|
21
|
+
activity :compose1 do
|
22
|
+
generated :composition1
|
23
|
+
associated_with :derek
|
24
|
+
used :regionList1
|
25
|
+
used :dataSet1
|
26
|
+
end
|
27
|
+
|
28
|
+
generate_n3(true)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
agent :publisci, type: "software"
|
2
|
+
agent :R, subject: "http://r-project.org"
|
3
|
+
|
4
|
+
plan :R_steps, steps: "spec/resource/example.Rhistory"
|
5
|
+
|
6
|
+
organization :sciruby, subject: "http://sciruby.com"
|
7
|
+
|
8
|
+
foaf = vocabulary "http://xmlns.com/foaf/0.1/"
|
9
|
+
|
10
|
+
agent :Will do
|
11
|
+
type "person"
|
12
|
+
name "Will Strinz"
|
13
|
+
on_behalf_of :sciruby
|
14
|
+
|
15
|
+
has foaf.mailbox, "wstrinz@gmail.com"
|
16
|
+
has foaf.homepage, "http://gsocsemantic.wordpress.com/"
|
17
|
+
end
|
18
|
+
|
19
|
+
data :triplified_example do
|
20
|
+
attributed_to :Will
|
21
|
+
derived_from do
|
22
|
+
entity :original
|
23
|
+
activity :triplify
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
activity :triplify do
|
28
|
+
generated :triplified_example
|
29
|
+
associated_with :publisci
|
30
|
+
used :original
|
31
|
+
end
|
32
|
+
|
33
|
+
data :original do
|
34
|
+
derived_from :field_work
|
35
|
+
attributed_to :R
|
36
|
+
end
|
37
|
+
|
38
|
+
data :field_work
|
39
|
+
|
40
|
+
activity :use_R do
|
41
|
+
generated :original
|
42
|
+
|
43
|
+
associated_with {
|
44
|
+
agent :R
|
45
|
+
plan :R_steps
|
46
|
+
}
|
47
|
+
|
48
|
+
associated_with :Will
|
49
|
+
end
|
50
|
+
|
51
|
+
generate_n3 true
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Feature: create generators
|
2
2
|
|
3
3
|
In order to check that objects conform to a common interface
|
4
|
-
I want to be able to call a generate method on various classes
|
4
|
+
I want to be able to call a generate method on various classes
|
5
5
|
|
6
6
|
Scenario: create a Dataframe generator
|
7
7
|
Given a Dataframe generator
|
@@ -9,17 +9,13 @@ Feature: create generators
|
|
9
9
|
|
10
10
|
Scenario: create a CSV generator
|
11
11
|
Given a CSV generator
|
12
|
-
Then I should be able to call its generate_n3 method
|
12
|
+
Then I should be able to call its generate_n3 method
|
13
13
|
|
14
14
|
Scenario: create a RMatrix generator
|
15
15
|
Given a RMatrix generator
|
16
16
|
Then I should be able to call its generate_n3 method
|
17
17
|
|
18
|
-
Scenario: create a
|
19
|
-
Given a
|
20
|
-
Then I should be able to call its generate_n3 method
|
21
|
-
|
22
|
-
Scenario: create a BigCross generator
|
23
|
-
Given a BigCross generator
|
24
|
-
Then I should be able to call its generate_n3 method
|
18
|
+
Scenario: create a RCross generator
|
19
|
+
Given a RCross generator
|
20
|
+
Then I should be able to call its generate_n3 method
|
25
21
|
|
@@ -1,10 +1,10 @@
|
|
1
|
-
|
2
1
|
Then(/^I should be able to cluster the result and print statistics$/) do
|
3
|
-
|
4
|
-
|
5
|
-
clustering
|
6
|
-
clustering.
|
7
|
-
f.
|
8
|
-
|
9
|
-
|
2
|
+
pending
|
3
|
+
# f=Tempfile.open('arff'); f.write @result; f.close
|
4
|
+
# clustering = Weka::Clusterer::SimpleKMeans::Base
|
5
|
+
# clustering.set_options "-N 5"
|
6
|
+
# clustering.set_data(Core::Parser::parse_ARFF(f.path))
|
7
|
+
# f.unlink
|
8
|
+
# clustered = clustering.new
|
9
|
+
# puts clustered
|
10
10
|
end
|
data/features/metadata.feature
CHANGED
@@ -15,10 +15,23 @@ Feature: Receive metadata as user input or extract from data sources
|
|
15
15
|
|
16
16
|
Scenario: Generate process information
|
17
17
|
Given a class which includes the Metadata module
|
18
|
-
|
18
|
+
And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org', process: 'spec/resource/example.Rhistory'}
|
19
|
+
And the rdf dataset {resource:'http://example.org/data'}
|
20
|
+
When I call its provenance method with the source object and the rdf object
|
19
21
|
Then I should receive a metadata string
|
20
22
|
|
21
23
|
Scenario: Generate organizational provenance information
|
22
24
|
Given a class which includes the Metadata module
|
23
|
-
|
25
|
+
And the source object {resource: 'http://example.org/software/R/var/ex', author: 'http://example.org/people/jrs', author_name: "J Random Scientist", organization: 'http://example.org/org/science', organization_name: "The League of Science" }
|
26
|
+
And the rdf dataset {resource:'http://example.org/data', author: 'http://gsocsemantic.wordpress.com/me', author_name: "Will Strinz", organization: 'http://sciruby.com/'}
|
27
|
+
When I call its provenance method with the source object and the rdf object
|
28
|
+
Then I should receive a metadata string
|
29
|
+
|
30
|
+
Scenario: Generate a provenance chain
|
31
|
+
Given a class which includes the Metadata module
|
32
|
+
And the source object {resource: 'http://example.org/software/R/var/ex', software:'http://r-project.org'}
|
33
|
+
And the rdf dataset {resource:'http://example.org/data'}
|
34
|
+
And the chain object {resource: 'http://example.org/lab_results/1', author: 'http://example.org/people/brl', author_name: "B Random Labtech"}
|
35
|
+
And the chain object {resource: 'http://example.org/field_work/1', author: 'http://example.org/people/arn', organization: 'http://example.org/org/science'}
|
36
|
+
When I call its provenance method with the source object, the rdf object, and the chain
|
24
37
|
Then I should receive a metadata string
|
data/features/metadata_steps.rb
CHANGED
@@ -5,6 +5,27 @@ Given(/^a class which includes the Metadata module$/) do
|
|
5
5
|
@klass = Meta
|
6
6
|
end
|
7
7
|
|
8
|
+
Given(/^the source object (\{.+\})$/) do |fields|
|
9
|
+
@original = eval(fields)
|
10
|
+
end
|
11
|
+
|
12
|
+
Given(/^the rdf dataset (\{.+\})$/) do |fields|
|
13
|
+
@rdf = eval(fields)
|
14
|
+
end
|
15
|
+
|
16
|
+
Given(/^the chain object (\{.+\})$/) do |fields|
|
17
|
+
(@chain ||= []) << eval(fields)
|
18
|
+
end
|
19
|
+
|
20
|
+
When(/^I call its provenance method with the source object, the rdf object, and the chain$/) do
|
21
|
+
@response = @klass.new.provenance(@original, @rdf, @chain)
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
When(/^I call its provenance method with the source object and the rdf object$/) do
|
26
|
+
@response = @klass.new.provenance(@original, @rdf, nil)
|
27
|
+
end
|
28
|
+
|
8
29
|
When(/^I call its basic method with the hash (\{.+\})$/) do |fields|
|
9
30
|
fields = eval(fields)
|
10
31
|
@response = @klass.new.basic(fields)
|
data/features/orm_steps.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative '../lib/bio-publisci.rb'
|
2
2
|
|
3
3
|
Given /^an ORM::DataCube entitled "(.*?)"$/ do |name|
|
4
|
-
@cube = R2RDF::
|
4
|
+
@cube = R2RDF::ORM::DataCube.new(name: name)
|
5
5
|
end
|
6
6
|
|
7
7
|
Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name, opts|
|
@@ -12,13 +12,13 @@ Given /^an ORM::DataCube entitled "(.*?)" with the following options:$/ do |name
|
|
12
12
|
|
13
13
|
v = hash["value"]
|
14
14
|
v = v[1..-1].to_sym if k[0] == ":"
|
15
|
-
|
15
|
+
|
16
16
|
v = true if v =="true"
|
17
17
|
v = false if v =="false"
|
18
18
|
|
19
19
|
options_hash[k] = v
|
20
20
|
}
|
21
|
-
@cube = R2RDF::
|
21
|
+
@cube = R2RDF::ORM::DataCube.new(options_hash)
|
22
22
|
end
|
23
23
|
|
24
24
|
Given(/^a turtle string from file (.*)$/) do |file|
|
@@ -30,7 +30,7 @@ Given(/^the URI string "(.*?)"$/) do |uri|
|
|
30
30
|
end
|
31
31
|
|
32
32
|
When(/^I call the ORM::DataCube class method load on it$/) do
|
33
|
-
@cube = R2RDF::
|
33
|
+
@cube = R2RDF::ORM::DataCube.load(@string)
|
34
34
|
end
|
35
35
|
|
36
36
|
When /^I add a "(.*?)" dimension$/ do |dim|
|
@@ -70,5 +70,5 @@ Then /^the to_n3 method should return a string with a "(.*?)"$/ do |search|
|
|
70
70
|
end
|
71
71
|
|
72
72
|
Then(/^I should receive an ORM::DataCube object$/) do
|
73
|
-
@cube.is_a?(R2RDF::
|
73
|
+
@cube.is_a?(R2RDF::ORM::DataCube).should == true
|
74
74
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
Feature: Receive metadata as user input or extract from data sources
|
2
|
+
|
3
|
+
To generate clean provenance strings through a friendly interface
|
4
|
+
I want to use a DSL for the PROV ontology
|
5
|
+
|
6
|
+
Scenario: Generate without any magic (more open-world)
|
7
|
+
Given the prov DSL string from file examples/no_magic.prov
|
8
|
+
When I call Prov.run on it
|
9
|
+
Then I should receive a provenance string
|
10
|
+
|
11
|
+
Scenario: Generate from file
|
12
|
+
Given the prov DSL string from file examples/prov_dsl.prov
|
13
|
+
When I call Prov.run on it
|
14
|
+
Then I should receive a provenance string
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Given(/^the prov DSL string from file (.+)$/) do |file|
|
2
|
+
@dsl_string = file
|
3
|
+
end
|
4
|
+
|
5
|
+
When(/^I call Prov\.run on it$/) do
|
6
|
+
@turtle_string = PubliSci::Prov.run(@dsl_string)
|
7
|
+
end
|
8
|
+
|
9
|
+
Then(/^I should receive a provenance string$/) do
|
10
|
+
puts @turtle_string
|
11
|
+
end
|