bio-publisci 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Rakefile +1 -1
- data/examples/prov_dsl.prov +2 -1
- data/examples/safe_gen.rb +7 -0
- data/examples/visualization/primer.prov +66 -0
- data/examples/visualization/prov_viz.rb +140 -0
- data/examples/visualization/viz.rb +35 -0
- data/features/metadata_steps.rb +2 -4
- data/features/orm_steps.rb +4 -4
- data/features/reader_steps.rb +1 -1
- data/features/store_steps.rb +1 -1
- data/features/writer.feature +1 -1
- data/features/writer_steps.rb +1 -1
- data/lib/bio-publisci.rb +10 -2
- data/lib/bio-publisci/analyzer.rb +4 -4
- data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
- data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
- data/lib/bio-publisci/dataset/configuration.rb +31 -0
- data/lib/bio-publisci/dataset/data_cube.rb +28 -17
- data/lib/bio-publisci/dataset/dataset.rb +11 -0
- data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
- data/lib/bio-publisci/dataset/interactive.rb +1 -1
- data/lib/bio-publisci/dsl/config.rb +34 -0
- data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
- data/lib/bio-publisci/dsl/dsl.rb +69 -0
- data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
- data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
- data/lib/bio-publisci/metadata/generator.rb +323 -0
- data/lib/bio-publisci/metadata/metadata.rb +3 -314
- data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
- data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
- data/lib/bio-publisci/metadata/prov/association.rb +2 -2
- data/lib/bio-publisci/metadata/prov/config.rb +34 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
- data/lib/bio-publisci/metadata/prov/element.rb +2 -2
- data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
- data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
- data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
- data/lib/bio-publisci/metadata/prov/role.rb +1 -1
- data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
- data/lib/bio-publisci/metadata/publisher.rb +25 -0
- data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
- data/lib/bio-publisci/mixins/registry.rb +27 -0
- data/lib/bio-publisci/output.rb +1 -1
- data/lib/bio-publisci/parser.rb +1 -1
- data/lib/bio-publisci/query/query_helper.rb +14 -14
- data/lib/bio-publisci/r_client.rb +5 -5
- data/lib/bio-publisci/readers/arff.rb +5 -5
- data/lib/bio-publisci/readers/csv.rb +3 -3
- data/lib/bio-publisci/readers/dataframe.rb +3 -3
- data/lib/bio-publisci/readers/r_cross.rb +4 -4
- data/lib/bio-publisci/readers/r_matrix.rb +3 -3
- data/lib/bio-publisci/store.rb +3 -3
- data/lib/bio-publisci/writers/arff.rb +6 -6
- data/lib/bio-publisci/writers/dataframe.rb +5 -5
- data/scripts/islet_mlratio.rb +1 -1
- data/scripts/scan_islet.rb +1 -1
- data/scripts/update_reference.rb +2 -2
- data/spec/ORM/data_cube_orm_spec.rb +2 -2
- data/spec/ORM/prov_model_spec.rb +19 -0
- data/spec/analyzer_spec.rb +7 -7
- data/spec/data_cube_spec.rb +13 -13
- data/spec/dataset_for_spec.rb +11 -4
- data/spec/dsl_spec.rb +90 -0
- data/spec/generators/csv_spec.rb +4 -4
- data/spec/generators/dataframe_spec.rb +6 -6
- data/spec/generators/r_cross_spec.rb +2 -2
- data/spec/generators/r_matrix_spec.rb +2 -2
- data/spec/metadata/metadata_dsl_spec.rb +68 -0
- data/spec/prov/activity_spec.rb +4 -4
- data/spec/prov/agent_spec.rb +3 -4
- data/spec/prov/association_spec.rb +1 -2
- data/spec/prov/config_spec.rb +28 -0
- data/spec/prov/derivation_spec.rb +30 -0
- data/spec/prov/entity_spec.rb +3 -4
- data/spec/prov/role_spec.rb +1 -2
- data/spec/prov/usage_spec.rb +1 -2
- data/spec/r_builder_spec.rb +3 -3
- data/spec/turtle/bacon +20 -4
- data/spec/turtle/reference +20 -4
- metadata +37 -4
@@ -1,17 +1,17 @@
|
|
1
|
-
module
|
1
|
+
module PubliSci
|
2
2
|
module ORM
|
3
3
|
class DataCube
|
4
|
-
extend
|
5
|
-
extend
|
6
|
-
extend
|
7
|
-
extend
|
8
|
-
extend
|
9
|
-
|
10
|
-
include
|
11
|
-
include
|
12
|
-
include
|
13
|
-
include
|
14
|
-
include
|
4
|
+
extend PubliSci::Dataset::DataCube
|
5
|
+
extend PubliSci::Analyzer
|
6
|
+
# extend PubliSci::Metadata
|
7
|
+
extend PubliSci::Query
|
8
|
+
extend PubliSci::Parser
|
9
|
+
|
10
|
+
include PubliSci::Dataset::DataCube
|
11
|
+
include PubliSci::Analyzer
|
12
|
+
include PubliSci::Metadata::Generator
|
13
|
+
include PubliSci::Query
|
14
|
+
include PubliSci::Parser
|
15
15
|
|
16
16
|
attr_accessor :labels
|
17
17
|
attr_accessor :dimensions
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module PubliSci
|
2
|
+
class Dataset
|
3
|
+
class Configuration
|
4
|
+
def self.defaults
|
5
|
+
{
|
6
|
+
interactive: false,
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
defaults.keys.each{|k|
|
11
|
+
default = defaults[k]
|
12
|
+
define_method(k) do |input=nil|
|
13
|
+
var = instance_variable_get :"@#{k}"
|
14
|
+
if var
|
15
|
+
var
|
16
|
+
else
|
17
|
+
instance_variable_set :"@#{k}", default
|
18
|
+
end
|
19
|
+
|
20
|
+
if input
|
21
|
+
instance_variable_set :"@#{k}", input
|
22
|
+
end
|
23
|
+
|
24
|
+
instance_variable_get :"@#{k}"
|
25
|
+
end
|
26
|
+
|
27
|
+
attr_writer k
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -5,10 +5,10 @@ class String
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
module
|
8
|
+
module PubliSci
|
9
9
|
class Dataset
|
10
10
|
module DataCube
|
11
|
-
include
|
11
|
+
include PubliSci::Parser
|
12
12
|
def defaults
|
13
13
|
{
|
14
14
|
type: :dataframe,
|
@@ -60,6 +60,11 @@ module R2RDF
|
|
60
60
|
[newm, newd, newc]
|
61
61
|
end
|
62
62
|
|
63
|
+
def component_gen(args,options={})
|
64
|
+
args = Array[args].flatten
|
65
|
+
args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
|
66
|
+
end
|
67
|
+
|
63
68
|
def encode_data(codes,data,var,options={})
|
64
69
|
codes = sanitize(codes)
|
65
70
|
new_data = {}
|
@@ -101,7 +106,7 @@ module R2RDF
|
|
101
106
|
str = prefixes(var,options)
|
102
107
|
str << data_structure_definition(measures, dimensions, codes, var, options)
|
103
108
|
str << dataset(var, options)
|
104
|
-
|
109
|
+
component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c }
|
105
110
|
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
106
111
|
measure_properties(measures, var, options).map{|p| str << p}
|
107
112
|
code_lists(codes, data, var, options).map{|l| str << l}
|
@@ -138,14 +143,15 @@ module R2RDF
|
|
138
143
|
var = sanitize([var]).first
|
139
144
|
options = defaults().merge(options)
|
140
145
|
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
141
|
-
|
146
|
+
cs_dims = component_gen(rdf_dimensions,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
|
147
|
+
cs_meas = component_gen(rdf_measures,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
|
142
148
|
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
|
143
|
-
|
144
|
-
str << " qb:component
|
149
|
+
cs_dims.map{|d|
|
150
|
+
str << " qb:component #{d} ;\n"
|
145
151
|
}
|
146
152
|
|
147
|
-
|
148
|
-
str << " qb:component
|
153
|
+
cs_meas.map{|m|
|
154
|
+
str << " qb:component #{m} ;\n"
|
149
155
|
}
|
150
156
|
str[-2]='.'
|
151
157
|
str<<"\n"
|
@@ -163,24 +169,29 @@ module R2RDF
|
|
163
169
|
EOF
|
164
170
|
end
|
165
171
|
|
166
|
-
def component_specifications(measure_names, dimension_names, var, options={})
|
172
|
+
def component_specifications(measure_names, dimension_names, codes, var, options={})
|
167
173
|
options = defaults().merge(options)
|
174
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
|
175
|
+
cs_dims = component_gen(rdf_dimensions,options)
|
176
|
+
cs_meas = component_gen(rdf_measures,options)
|
177
|
+
# cs_dims = rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
|
178
|
+
# cs_meas = rdf_measures.map{|m| m.gsub('prop:','cs:')}
|
168
179
|
specs = []
|
169
180
|
|
170
|
-
|
181
|
+
rdf_dimensions.each_with_index.map{|d,i|
|
171
182
|
specs << <<-EOF.unindent
|
172
|
-
|
173
|
-
rdfs:label "#{
|
174
|
-
qb:dimension
|
183
|
+
#{cs_dims[i]} a qb:ComponentSpecification ;
|
184
|
+
rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))} Component" ;
|
185
|
+
qb:dimension #{d} .
|
175
186
|
|
176
187
|
EOF
|
177
188
|
}
|
178
189
|
|
179
|
-
|
190
|
+
rdf_measures.each_with_index.map{|n,i|
|
180
191
|
specs << <<-EOF.unindent
|
181
|
-
|
182
|
-
rdfs:label "#{
|
183
|
-
qb:measure
|
192
|
+
#{cs_meas[i]} a qb:ComponentSpecification ;
|
193
|
+
rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))} Component" ;
|
194
|
+
qb:measure #{n} .
|
184
195
|
|
185
196
|
EOF
|
186
197
|
}
|
@@ -1,6 +1,7 @@
|
|
1
|
-
|
1
|
+
require 'open-uri'
|
2
|
+
module PubliSci
|
2
3
|
class Dataset
|
3
|
-
extend
|
4
|
+
extend PubliSci::Interactive
|
4
5
|
|
5
6
|
def self.for(object, options={}, ask_on_ambiguous=true)
|
6
7
|
|
@@ -23,10 +24,12 @@ module R2RDF
|
|
23
24
|
when ".RData"
|
24
25
|
r_object(object, options, ask_on_ambiguous)
|
25
26
|
when /.csv/i
|
26
|
-
|
27
|
+
PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
|
27
28
|
end
|
29
|
+
elsif object =~ %r{http[s]://.+}
|
30
|
+
self.for(download(object).path, options, ask_on_ambiguous)
|
28
31
|
else
|
29
|
-
raise "Unable to find reader for File or String"
|
32
|
+
raise "Unable to find reader for File or String #{object}"
|
30
33
|
end
|
31
34
|
elsif object.is_a? Rserve::REXP
|
32
35
|
r_object(object, options, ask_on_ambiguous)
|
@@ -35,6 +38,13 @@ module R2RDF
|
|
35
38
|
end
|
36
39
|
end
|
37
40
|
|
41
|
+
def self.download(uri)
|
42
|
+
out = Tempfile.new(uri.split('/').last)
|
43
|
+
out.write open(uri).read
|
44
|
+
out.close
|
45
|
+
out
|
46
|
+
end
|
47
|
+
|
38
48
|
def self.r_object(object, options={}, ask_on_ambiguous=true)
|
39
49
|
if object.is_a? String
|
40
50
|
con = Rserve::Connection.new
|
@@ -49,7 +59,7 @@ module R2RDF
|
|
49
59
|
r_classes = con.eval("class(#{var})").to_ruby
|
50
60
|
|
51
61
|
if r_classes.include? "data.frame"
|
52
|
-
df =
|
62
|
+
df = PubliSci::Reader::Dataframe.new
|
53
63
|
unless options[:dimensions] || !ask_on_ambiguous
|
54
64
|
dims = con.eval("names(#{var})").to_ruby
|
55
65
|
puts "Which dimensions? #{dims}"
|
@@ -70,7 +80,7 @@ module R2RDF
|
|
70
80
|
df.generate_n3(con.eval(var),var,options)
|
71
81
|
|
72
82
|
elsif r_classes.include? "cross"
|
73
|
-
bc =
|
83
|
+
bc = PubliSci::Reader::RCross.new
|
74
84
|
|
75
85
|
unless options[:measures] || !ask_on_ambiguous
|
76
86
|
pheno_names = con.eval("names(#{var}$pheno)").to_ruby
|
@@ -91,7 +101,7 @@ module R2RDF
|
|
91
101
|
bc.generate_n3(con, var, base, options)
|
92
102
|
|
93
103
|
elsif r_classes.include? "matrix"
|
94
|
-
mat =
|
104
|
+
mat = PubliSci::Reader::RMatrix.new
|
95
105
|
|
96
106
|
unless options[:measures] || !ask_on_ambiguous
|
97
107
|
puts "Row label"
|
@@ -118,13 +128,13 @@ module R2RDF
|
|
118
128
|
|
119
129
|
mat.generate_n3(con, var, base, options)
|
120
130
|
else
|
121
|
-
raise "no
|
131
|
+
raise "no PubliSci::Reader found for #{r_classes}"
|
122
132
|
end
|
123
133
|
|
124
134
|
elsif object.is_a? Rserve::REXP
|
125
135
|
if object.attr.payload["class"].payload.first
|
126
136
|
|
127
|
-
df =
|
137
|
+
df = PubliSci::Reader::Dataframe.new
|
128
138
|
|
129
139
|
var = nil
|
130
140
|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module DSL
|
3
|
+
class Configuration
|
4
|
+
def self.defaults
|
5
|
+
{
|
6
|
+
output: :generate_n3,
|
7
|
+
abbreviate: false,
|
8
|
+
repository: :in_memory,
|
9
|
+
repository_url: 'http://localhost:8080/'
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
defaults.keys.each{|k|
|
14
|
+
default = defaults[k]
|
15
|
+
define_method(k) do |input=nil|
|
16
|
+
var = instance_variable_get :"@#{k}"
|
17
|
+
if var
|
18
|
+
var
|
19
|
+
else
|
20
|
+
instance_variable_set :"@#{k}", default
|
21
|
+
end
|
22
|
+
|
23
|
+
if input
|
24
|
+
instance_variable_set :"@#{k}", input
|
25
|
+
end
|
26
|
+
|
27
|
+
instance_variable_get :"@#{k}"
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_writer k
|
31
|
+
}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module PubliSci
|
2
|
+
class Dataset
|
3
|
+
module DSL
|
4
|
+
|
5
|
+
class Instance
|
6
|
+
include Dataset::DSL
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
Dataset.registry.clear
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# def interactive(value=nil)
|
14
|
+
# set_or_get('interactive',value)
|
15
|
+
# end
|
16
|
+
|
17
|
+
def object(file=nil)
|
18
|
+
set_or_get('object',file)
|
19
|
+
end
|
20
|
+
alias_method :source, :object
|
21
|
+
|
22
|
+
def dimension(*args)
|
23
|
+
if args.size == 0
|
24
|
+
add_or_get('dimension',nil)
|
25
|
+
else
|
26
|
+
args.each{|arg|
|
27
|
+
add_or_get('dimension',arg)
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def measure(*args)
|
33
|
+
if args.size == 0
|
34
|
+
add_or_get('measure',nil)
|
35
|
+
else
|
36
|
+
args.each{|arg|
|
37
|
+
add_or_get('measure',arg)
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def option(opt=nil,value=nil)
|
43
|
+
if opt == nil || value == nil
|
44
|
+
@dataset_generator_options
|
45
|
+
else
|
46
|
+
(@dataset_generator_options ||= {})[opt] = value
|
47
|
+
end
|
48
|
+
end
|
49
|
+
alias_method :options, :option
|
50
|
+
|
51
|
+
def settings
|
52
|
+
Dataset.configuration
|
53
|
+
end
|
54
|
+
|
55
|
+
def generate_n3
|
56
|
+
opts = {}
|
57
|
+
%w{dimension measure}.each{|field|
|
58
|
+
opts[field.to_sym] = send(field.to_sym) if send(field.to_sym)
|
59
|
+
}
|
60
|
+
interact = settings.interactive
|
61
|
+
if options
|
62
|
+
opts = opts.merge(options)
|
63
|
+
end
|
64
|
+
Dataset.for(object,opts,interact)
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def set_or_get(var,input=nil)
|
69
|
+
ivar = instance_variable_get("@#{var}")
|
70
|
+
|
71
|
+
if input
|
72
|
+
instance_variable_set("@#{var}", input)
|
73
|
+
else
|
74
|
+
ivar
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def add_or_get(var,input)
|
79
|
+
ivar = instance_variable_get("@#{var}")
|
80
|
+
|
81
|
+
if input
|
82
|
+
instance_variable_set("@#{var}", []) unless ivar
|
83
|
+
instance_variable_get("@#{var}") << input
|
84
|
+
instance_variable_get("@#{var}")
|
85
|
+
else
|
86
|
+
ivar
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module DSL
|
3
|
+
attr_reader :base_url
|
4
|
+
|
5
|
+
# Use to set base url for whole script; helps when referring to dataset
|
6
|
+
# resources from metadata and
|
7
|
+
def base_url=(url)
|
8
|
+
@base_url = url
|
9
|
+
Prov.base_url=url
|
10
|
+
end
|
11
|
+
|
12
|
+
def data(&block)
|
13
|
+
inst=PubliSci::Dataset::DSL::Instance.new
|
14
|
+
inst.instance_eval(&block)
|
15
|
+
@_dsl_data ||= [] << inst
|
16
|
+
inst
|
17
|
+
end
|
18
|
+
|
19
|
+
def metadata(&block)
|
20
|
+
inst=PubliSci::Metadata::DSL::Instance.new
|
21
|
+
inst.instance_eval(&block)
|
22
|
+
@_dsl_metadata = inst
|
23
|
+
inst
|
24
|
+
end
|
25
|
+
|
26
|
+
def provenance(&block)
|
27
|
+
inst=PubliSci::Prov::DSL::Instance.new
|
28
|
+
inst.instance_eval(&block)
|
29
|
+
@_dsl_prov = inst
|
30
|
+
inst
|
31
|
+
end
|
32
|
+
|
33
|
+
def configuration
|
34
|
+
@_dsl_config ||= DSL::Configuration.new
|
35
|
+
end
|
36
|
+
|
37
|
+
def configure
|
38
|
+
yield configuration
|
39
|
+
end
|
40
|
+
|
41
|
+
def settings
|
42
|
+
configuration
|
43
|
+
end
|
44
|
+
|
45
|
+
def generate_n3
|
46
|
+
out = ""
|
47
|
+
@_dsl_data.each{|dat| out << dat.generate_n3 } if @_dsl_data
|
48
|
+
out << @_dsl_metadata.generate_n3 if @_dsl_metadata
|
49
|
+
out << @_dsl_prov.generate_n3 if @_dsl_prov
|
50
|
+
out
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_repository(turtle_string=generate_n3)
|
54
|
+
repo = settings.repository
|
55
|
+
case repo
|
56
|
+
when :in_memory
|
57
|
+
repo = RDF::Repository.new
|
58
|
+
when :fourstore
|
59
|
+
repo = RDF::FourStore::Repository.new('http://localhost:8080')
|
60
|
+
end
|
61
|
+
f=Tempfile.new(['repo','.ttl'])
|
62
|
+
f.write(turtle_string)
|
63
|
+
f.close
|
64
|
+
repo.load(f.path, :format => :ttl).to_s
|
65
|
+
f.unlink
|
66
|
+
repo
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|