bio-publisci 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Rakefile +1 -1
- data/examples/prov_dsl.prov +2 -1
- data/examples/safe_gen.rb +7 -0
- data/examples/visualization/primer.prov +66 -0
- data/examples/visualization/prov_viz.rb +140 -0
- data/examples/visualization/viz.rb +35 -0
- data/features/metadata_steps.rb +2 -4
- data/features/orm_steps.rb +4 -4
- data/features/reader_steps.rb +1 -1
- data/features/store_steps.rb +1 -1
- data/features/writer.feature +1 -1
- data/features/writer_steps.rb +1 -1
- data/lib/bio-publisci.rb +10 -2
- data/lib/bio-publisci/analyzer.rb +4 -4
- data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
- data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
- data/lib/bio-publisci/dataset/configuration.rb +31 -0
- data/lib/bio-publisci/dataset/data_cube.rb +28 -17
- data/lib/bio-publisci/dataset/dataset.rb +11 -0
- data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
- data/lib/bio-publisci/dataset/interactive.rb +1 -1
- data/lib/bio-publisci/dsl/config.rb +34 -0
- data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
- data/lib/bio-publisci/dsl/dsl.rb +69 -0
- data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
- data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
- data/lib/bio-publisci/metadata/generator.rb +323 -0
- data/lib/bio-publisci/metadata/metadata.rb +3 -314
- data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
- data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
- data/lib/bio-publisci/metadata/prov/association.rb +2 -2
- data/lib/bio-publisci/metadata/prov/config.rb +34 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
- data/lib/bio-publisci/metadata/prov/element.rb +2 -2
- data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
- data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
- data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
- data/lib/bio-publisci/metadata/prov/role.rb +1 -1
- data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
- data/lib/bio-publisci/metadata/publisher.rb +25 -0
- data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
- data/lib/bio-publisci/mixins/registry.rb +27 -0
- data/lib/bio-publisci/output.rb +1 -1
- data/lib/bio-publisci/parser.rb +1 -1
- data/lib/bio-publisci/query/query_helper.rb +14 -14
- data/lib/bio-publisci/r_client.rb +5 -5
- data/lib/bio-publisci/readers/arff.rb +5 -5
- data/lib/bio-publisci/readers/csv.rb +3 -3
- data/lib/bio-publisci/readers/dataframe.rb +3 -3
- data/lib/bio-publisci/readers/r_cross.rb +4 -4
- data/lib/bio-publisci/readers/r_matrix.rb +3 -3
- data/lib/bio-publisci/store.rb +3 -3
- data/lib/bio-publisci/writers/arff.rb +6 -6
- data/lib/bio-publisci/writers/dataframe.rb +5 -5
- data/scripts/islet_mlratio.rb +1 -1
- data/scripts/scan_islet.rb +1 -1
- data/scripts/update_reference.rb +2 -2
- data/spec/ORM/data_cube_orm_spec.rb +2 -2
- data/spec/ORM/prov_model_spec.rb +19 -0
- data/spec/analyzer_spec.rb +7 -7
- data/spec/data_cube_spec.rb +13 -13
- data/spec/dataset_for_spec.rb +11 -4
- data/spec/dsl_spec.rb +90 -0
- data/spec/generators/csv_spec.rb +4 -4
- data/spec/generators/dataframe_spec.rb +6 -6
- data/spec/generators/r_cross_spec.rb +2 -2
- data/spec/generators/r_matrix_spec.rb +2 -2
- data/spec/metadata/metadata_dsl_spec.rb +68 -0
- data/spec/prov/activity_spec.rb +4 -4
- data/spec/prov/agent_spec.rb +3 -4
- data/spec/prov/association_spec.rb +1 -2
- data/spec/prov/config_spec.rb +28 -0
- data/spec/prov/derivation_spec.rb +30 -0
- data/spec/prov/entity_spec.rb +3 -4
- data/spec/prov/role_spec.rb +1 -2
- data/spec/prov/usage_spec.rb +1 -2
- data/spec/r_builder_spec.rb +3 -3
- data/spec/turtle/bacon +20 -4
- data/spec/turtle/reference +20 -4
- metadata +37 -4
@@ -1,17 +1,17 @@
|
|
1
|
-
module
|
1
|
+
module PubliSci
|
2
2
|
module ORM
|
3
3
|
class DataCube
|
4
|
-
extend
|
5
|
-
extend
|
6
|
-
extend
|
7
|
-
extend
|
8
|
-
extend
|
9
|
-
|
10
|
-
include
|
11
|
-
include
|
12
|
-
include
|
13
|
-
include
|
14
|
-
include
|
4
|
+
extend PubliSci::Dataset::DataCube
|
5
|
+
extend PubliSci::Analyzer
|
6
|
+
# extend PubliSci::Metadata
|
7
|
+
extend PubliSci::Query
|
8
|
+
extend PubliSci::Parser
|
9
|
+
|
10
|
+
include PubliSci::Dataset::DataCube
|
11
|
+
include PubliSci::Analyzer
|
12
|
+
include PubliSci::Metadata::Generator
|
13
|
+
include PubliSci::Query
|
14
|
+
include PubliSci::Parser
|
15
15
|
|
16
16
|
attr_accessor :labels
|
17
17
|
attr_accessor :dimensions
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module PubliSci
|
2
|
+
class Dataset
|
3
|
+
class Configuration
|
4
|
+
def self.defaults
|
5
|
+
{
|
6
|
+
interactive: false,
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
defaults.keys.each{|k|
|
11
|
+
default = defaults[k]
|
12
|
+
define_method(k) do |input=nil|
|
13
|
+
var = instance_variable_get :"@#{k}"
|
14
|
+
if var
|
15
|
+
var
|
16
|
+
else
|
17
|
+
instance_variable_set :"@#{k}", default
|
18
|
+
end
|
19
|
+
|
20
|
+
if input
|
21
|
+
instance_variable_set :"@#{k}", input
|
22
|
+
end
|
23
|
+
|
24
|
+
instance_variable_get :"@#{k}"
|
25
|
+
end
|
26
|
+
|
27
|
+
attr_writer k
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -5,10 +5,10 @@ class String
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
module
|
8
|
+
module PubliSci
|
9
9
|
class Dataset
|
10
10
|
module DataCube
|
11
|
-
include
|
11
|
+
include PubliSci::Parser
|
12
12
|
def defaults
|
13
13
|
{
|
14
14
|
type: :dataframe,
|
@@ -60,6 +60,11 @@ module R2RDF
|
|
60
60
|
[newm, newd, newc]
|
61
61
|
end
|
62
62
|
|
63
|
+
def component_gen(args,options={})
|
64
|
+
args = Array[args].flatten
|
65
|
+
args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
|
66
|
+
end
|
67
|
+
|
63
68
|
def encode_data(codes,data,var,options={})
|
64
69
|
codes = sanitize(codes)
|
65
70
|
new_data = {}
|
@@ -101,7 +106,7 @@ module R2RDF
|
|
101
106
|
str = prefixes(var,options)
|
102
107
|
str << data_structure_definition(measures, dimensions, codes, var, options)
|
103
108
|
str << dataset(var, options)
|
104
|
-
|
109
|
+
component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c }
|
105
110
|
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
106
111
|
measure_properties(measures, var, options).map{|p| str << p}
|
107
112
|
code_lists(codes, data, var, options).map{|l| str << l}
|
@@ -138,14 +143,15 @@ module R2RDF
|
|
138
143
|
var = sanitize([var]).first
|
139
144
|
options = defaults().merge(options)
|
140
145
|
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
141
|
-
|
146
|
+
cs_dims = component_gen(rdf_dimensions,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
|
147
|
+
cs_meas = component_gen(rdf_measures,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
|
142
148
|
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
|
143
|
-
|
144
|
-
str << " qb:component
|
149
|
+
cs_dims.map{|d|
|
150
|
+
str << " qb:component #{d} ;\n"
|
145
151
|
}
|
146
152
|
|
147
|
-
|
148
|
-
str << " qb:component
|
153
|
+
cs_meas.map{|m|
|
154
|
+
str << " qb:component #{m} ;\n"
|
149
155
|
}
|
150
156
|
str[-2]='.'
|
151
157
|
str<<"\n"
|
@@ -163,24 +169,29 @@ module R2RDF
|
|
163
169
|
EOF
|
164
170
|
end
|
165
171
|
|
166
|
-
def component_specifications(measure_names, dimension_names, var, options={})
|
172
|
+
def component_specifications(measure_names, dimension_names, codes, var, options={})
|
167
173
|
options = defaults().merge(options)
|
174
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
|
175
|
+
cs_dims = component_gen(rdf_dimensions,options)
|
176
|
+
cs_meas = component_gen(rdf_measures,options)
|
177
|
+
# cs_dims = rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
|
178
|
+
# cs_meas = rdf_measures.map{|m| m.gsub('prop:','cs:')}
|
168
179
|
specs = []
|
169
180
|
|
170
|
-
|
181
|
+
rdf_dimensions.each_with_index.map{|d,i|
|
171
182
|
specs << <<-EOF.unindent
|
172
|
-
|
173
|
-
rdfs:label "#{
|
174
|
-
qb:dimension
|
183
|
+
#{cs_dims[i]} a qb:ComponentSpecification ;
|
184
|
+
rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))} Component" ;
|
185
|
+
qb:dimension #{d} .
|
175
186
|
|
176
187
|
EOF
|
177
188
|
}
|
178
189
|
|
179
|
-
|
190
|
+
rdf_measures.each_with_index.map{|n,i|
|
180
191
|
specs << <<-EOF.unindent
|
181
|
-
|
182
|
-
rdfs:label "#{
|
183
|
-
qb:measure
|
192
|
+
#{cs_meas[i]} a qb:ComponentSpecification ;
|
193
|
+
rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))} Component" ;
|
194
|
+
qb:measure #{n} .
|
184
195
|
|
185
196
|
EOF
|
186
197
|
}
|
@@ -1,6 +1,7 @@
|
|
1
|
-
|
1
|
+
require 'open-uri'
|
2
|
+
module PubliSci
|
2
3
|
class Dataset
|
3
|
-
extend
|
4
|
+
extend PubliSci::Interactive
|
4
5
|
|
5
6
|
def self.for(object, options={}, ask_on_ambiguous=true)
|
6
7
|
|
@@ -23,10 +24,12 @@ module R2RDF
|
|
23
24
|
when ".RData"
|
24
25
|
r_object(object, options, ask_on_ambiguous)
|
25
26
|
when /.csv/i
|
26
|
-
|
27
|
+
PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
|
27
28
|
end
|
29
|
+
elsif object =~ %r{http[s]://.+}
|
30
|
+
self.for(download(object).path, options, ask_on_ambiguous)
|
28
31
|
else
|
29
|
-
raise "Unable to find reader for File or String"
|
32
|
+
raise "Unable to find reader for File or String #{object}"
|
30
33
|
end
|
31
34
|
elsif object.is_a? Rserve::REXP
|
32
35
|
r_object(object, options, ask_on_ambiguous)
|
@@ -35,6 +38,13 @@ module R2RDF
|
|
35
38
|
end
|
36
39
|
end
|
37
40
|
|
41
|
+
def self.download(uri)
|
42
|
+
out = Tempfile.new(uri.split('/').last)
|
43
|
+
out.write open(uri).read
|
44
|
+
out.close
|
45
|
+
out
|
46
|
+
end
|
47
|
+
|
38
48
|
def self.r_object(object, options={}, ask_on_ambiguous=true)
|
39
49
|
if object.is_a? String
|
40
50
|
con = Rserve::Connection.new
|
@@ -49,7 +59,7 @@ module R2RDF
|
|
49
59
|
r_classes = con.eval("class(#{var})").to_ruby
|
50
60
|
|
51
61
|
if r_classes.include? "data.frame"
|
52
|
-
df =
|
62
|
+
df = PubliSci::Reader::Dataframe.new
|
53
63
|
unless options[:dimensions] || !ask_on_ambiguous
|
54
64
|
dims = con.eval("names(#{var})").to_ruby
|
55
65
|
puts "Which dimensions? #{dims}"
|
@@ -70,7 +80,7 @@ module R2RDF
|
|
70
80
|
df.generate_n3(con.eval(var),var,options)
|
71
81
|
|
72
82
|
elsif r_classes.include? "cross"
|
73
|
-
bc =
|
83
|
+
bc = PubliSci::Reader::RCross.new
|
74
84
|
|
75
85
|
unless options[:measures] || !ask_on_ambiguous
|
76
86
|
pheno_names = con.eval("names(#{var}$pheno)").to_ruby
|
@@ -91,7 +101,7 @@ module R2RDF
|
|
91
101
|
bc.generate_n3(con, var, base, options)
|
92
102
|
|
93
103
|
elsif r_classes.include? "matrix"
|
94
|
-
mat =
|
104
|
+
mat = PubliSci::Reader::RMatrix.new
|
95
105
|
|
96
106
|
unless options[:measures] || !ask_on_ambiguous
|
97
107
|
puts "Row label"
|
@@ -118,13 +128,13 @@ module R2RDF
|
|
118
128
|
|
119
129
|
mat.generate_n3(con, var, base, options)
|
120
130
|
else
|
121
|
-
raise "no
|
131
|
+
raise "no PubliSci::Reader found for #{r_classes}"
|
122
132
|
end
|
123
133
|
|
124
134
|
elsif object.is_a? Rserve::REXP
|
125
135
|
if object.attr.payload["class"].payload.first
|
126
136
|
|
127
|
-
df =
|
137
|
+
df = PubliSci::Reader::Dataframe.new
|
128
138
|
|
129
139
|
var = nil
|
130
140
|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module DSL
|
3
|
+
class Configuration
|
4
|
+
def self.defaults
|
5
|
+
{
|
6
|
+
output: :generate_n3,
|
7
|
+
abbreviate: false,
|
8
|
+
repository: :in_memory,
|
9
|
+
repository_url: 'http://localhost:8080/'
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
defaults.keys.each{|k|
|
14
|
+
default = defaults[k]
|
15
|
+
define_method(k) do |input=nil|
|
16
|
+
var = instance_variable_get :"@#{k}"
|
17
|
+
if var
|
18
|
+
var
|
19
|
+
else
|
20
|
+
instance_variable_set :"@#{k}", default
|
21
|
+
end
|
22
|
+
|
23
|
+
if input
|
24
|
+
instance_variable_set :"@#{k}", input
|
25
|
+
end
|
26
|
+
|
27
|
+
instance_variable_get :"@#{k}"
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_writer k
|
31
|
+
}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module PubliSci
|
2
|
+
class Dataset
|
3
|
+
module DSL
|
4
|
+
|
5
|
+
class Instance
|
6
|
+
include Dataset::DSL
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
Dataset.registry.clear
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# def interactive(value=nil)
|
14
|
+
# set_or_get('interactive',value)
|
15
|
+
# end
|
16
|
+
|
17
|
+
def object(file=nil)
|
18
|
+
set_or_get('object',file)
|
19
|
+
end
|
20
|
+
alias_method :source, :object
|
21
|
+
|
22
|
+
def dimension(*args)
|
23
|
+
if args.size == 0
|
24
|
+
add_or_get('dimension',nil)
|
25
|
+
else
|
26
|
+
args.each{|arg|
|
27
|
+
add_or_get('dimension',arg)
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def measure(*args)
|
33
|
+
if args.size == 0
|
34
|
+
add_or_get('measure',nil)
|
35
|
+
else
|
36
|
+
args.each{|arg|
|
37
|
+
add_or_get('measure',arg)
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def option(opt=nil,value=nil)
|
43
|
+
if opt == nil || value == nil
|
44
|
+
@dataset_generator_options
|
45
|
+
else
|
46
|
+
(@dataset_generator_options ||= {})[opt] = value
|
47
|
+
end
|
48
|
+
end
|
49
|
+
alias_method :options, :option
|
50
|
+
|
51
|
+
def settings
|
52
|
+
Dataset.configuration
|
53
|
+
end
|
54
|
+
|
55
|
+
def generate_n3
|
56
|
+
opts = {}
|
57
|
+
%w{dimension measure}.each{|field|
|
58
|
+
opts[field.to_sym] = send(field.to_sym) if send(field.to_sym)
|
59
|
+
}
|
60
|
+
interact = settings.interactive
|
61
|
+
if options
|
62
|
+
opts = opts.merge(options)
|
63
|
+
end
|
64
|
+
Dataset.for(object,opts,interact)
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def set_or_get(var,input=nil)
|
69
|
+
ivar = instance_variable_get("@#{var}")
|
70
|
+
|
71
|
+
if input
|
72
|
+
instance_variable_set("@#{var}", input)
|
73
|
+
else
|
74
|
+
ivar
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_or_get(var,input)
|
79
|
+
ivar = instance_variable_get("@#{var}")
|
80
|
+
|
81
|
+
if input
|
82
|
+
instance_variable_set("@#{var}", []) unless ivar
|
83
|
+
instance_variable_get("@#{var}") << input
|
84
|
+
instance_variable_get("@#{var}")
|
85
|
+
else
|
86
|
+
ivar
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module DSL
|
3
|
+
attr_reader :base_url
|
4
|
+
|
5
|
+
# Use to set base url for whole script; helps when referring to dataset
|
6
|
+
# resources from metadata and
|
7
|
+
def base_url=(url)
|
8
|
+
@base_url = url
|
9
|
+
Prov.base_url=url
|
10
|
+
end
|
11
|
+
|
12
|
+
def data(&block)
|
13
|
+
inst=PubliSci::Dataset::DSL::Instance.new
|
14
|
+
inst.instance_eval(&block)
|
15
|
+
@_dsl_data ||= [] << inst
|
16
|
+
inst
|
17
|
+
end
|
18
|
+
|
19
|
+
def metadata(&block)
|
20
|
+
inst=PubliSci::Metadata::DSL::Instance.new
|
21
|
+
inst.instance_eval(&block)
|
22
|
+
@_dsl_metadata = inst
|
23
|
+
inst
|
24
|
+
end
|
25
|
+
|
26
|
+
def provenance(&block)
|
27
|
+
inst=PubliSci::Prov::DSL::Instance.new
|
28
|
+
inst.instance_eval(&block)
|
29
|
+
@_dsl_prov = inst
|
30
|
+
inst
|
31
|
+
end
|
32
|
+
|
33
|
+
def configuration
|
34
|
+
@_dsl_config ||= DSL::Configuration.new
|
35
|
+
end
|
36
|
+
|
37
|
+
def configure
|
38
|
+
yield configuration
|
39
|
+
end
|
40
|
+
|
41
|
+
def settings
|
42
|
+
configuration
|
43
|
+
end
|
44
|
+
|
45
|
+
def generate_n3
|
46
|
+
out = ""
|
47
|
+
@_dsl_data.each{|dat| out << dat.generate_n3 } if @_dsl_data
|
48
|
+
out << @_dsl_metadata.generate_n3 if @_dsl_metadata
|
49
|
+
out << @_dsl_prov.generate_n3 if @_dsl_prov
|
50
|
+
out
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_repository(turtle_string=generate_n3)
|
54
|
+
repo = settings.repository
|
55
|
+
case repo
|
56
|
+
when :in_memory
|
57
|
+
repo = RDF::Repository.new
|
58
|
+
when :fourstore
|
59
|
+
repo = RDF::FourStore::Repository.new('http://localhost:8080')
|
60
|
+
end
|
61
|
+
f=Tempfile.new(['repo','.ttl'])
|
62
|
+
f.write(turtle_string)
|
63
|
+
f.close
|
64
|
+
repo.load(f.path, :format => :ttl).to_s
|
65
|
+
f.unlink
|
66
|
+
repo
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|