bio-publisci 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +1 -1
  4. data/examples/prov_dsl.prov +2 -1
  5. data/examples/safe_gen.rb +7 -0
  6. data/examples/visualization/primer.prov +66 -0
  7. data/examples/visualization/prov_viz.rb +140 -0
  8. data/examples/visualization/viz.rb +35 -0
  9. data/features/metadata_steps.rb +2 -4
  10. data/features/orm_steps.rb +4 -4
  11. data/features/reader_steps.rb +1 -1
  12. data/features/store_steps.rb +1 -1
  13. data/features/writer.feature +1 -1
  14. data/features/writer_steps.rb +1 -1
  15. data/lib/bio-publisci.rb +10 -2
  16. data/lib/bio-publisci/analyzer.rb +4 -4
  17. data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
  18. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
  19. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
  20. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  21. data/lib/bio-publisci/dataset/data_cube.rb +28 -17
  22. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  23. data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
  24. data/lib/bio-publisci/dataset/interactive.rb +1 -1
  25. data/lib/bio-publisci/dsl/config.rb +34 -0
  26. data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
  27. data/lib/bio-publisci/dsl/dsl.rb +69 -0
  28. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  29. data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
  30. data/lib/bio-publisci/metadata/generator.rb +323 -0
  31. data/lib/bio-publisci/metadata/metadata.rb +3 -314
  32. data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
  33. data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
  34. data/lib/bio-publisci/metadata/prov/association.rb +2 -2
  35. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  36. data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
  37. data/lib/bio-publisci/metadata/prov/element.rb +2 -2
  38. data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
  39. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
  40. data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
  41. data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
  42. data/lib/bio-publisci/metadata/prov/role.rb +1 -1
  43. data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
  44. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  45. data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
  46. data/lib/bio-publisci/mixins/registry.rb +27 -0
  47. data/lib/bio-publisci/output.rb +1 -1
  48. data/lib/bio-publisci/parser.rb +1 -1
  49. data/lib/bio-publisci/query/query_helper.rb +14 -14
  50. data/lib/bio-publisci/r_client.rb +5 -5
  51. data/lib/bio-publisci/readers/arff.rb +5 -5
  52. data/lib/bio-publisci/readers/csv.rb +3 -3
  53. data/lib/bio-publisci/readers/dataframe.rb +3 -3
  54. data/lib/bio-publisci/readers/r_cross.rb +4 -4
  55. data/lib/bio-publisci/readers/r_matrix.rb +3 -3
  56. data/lib/bio-publisci/store.rb +3 -3
  57. data/lib/bio-publisci/writers/arff.rb +6 -6
  58. data/lib/bio-publisci/writers/dataframe.rb +5 -5
  59. data/scripts/islet_mlratio.rb +1 -1
  60. data/scripts/scan_islet.rb +1 -1
  61. data/scripts/update_reference.rb +2 -2
  62. data/spec/ORM/data_cube_orm_spec.rb +2 -2
  63. data/spec/ORM/prov_model_spec.rb +19 -0
  64. data/spec/analyzer_spec.rb +7 -7
  65. data/spec/data_cube_spec.rb +13 -13
  66. data/spec/dataset_for_spec.rb +11 -4
  67. data/spec/dsl_spec.rb +90 -0
  68. data/spec/generators/csv_spec.rb +4 -4
  69. data/spec/generators/dataframe_spec.rb +6 -6
  70. data/spec/generators/r_cross_spec.rb +2 -2
  71. data/spec/generators/r_matrix_spec.rb +2 -2
  72. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  73. data/spec/prov/activity_spec.rb +4 -4
  74. data/spec/prov/agent_spec.rb +3 -4
  75. data/spec/prov/association_spec.rb +1 -2
  76. data/spec/prov/config_spec.rb +28 -0
  77. data/spec/prov/derivation_spec.rb +30 -0
  78. data/spec/prov/entity_spec.rb +3 -4
  79. data/spec/prov/role_spec.rb +1 -2
  80. data/spec/prov/usage_spec.rb +1 -2
  81. data/spec/r_builder_spec.rb +3 -3
  82. data/spec/turtle/bacon +20 -4
  83. data/spec/turtle/reference +20 -4
  84. metadata +37 -4
@@ -1,17 +1,17 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module ORM
3
3
  class DataCube
4
- extend R2RDF::Dataset::DataCube
5
- extend R2RDF::Analyzer
6
- extend R2RDF::Metadata
7
- extend R2RDF::Query
8
- extend R2RDF::Parser
9
-
10
- include R2RDF::Dataset::DataCube
11
- include R2RDF::Analyzer
12
- include R2RDF::Metadata
13
- include R2RDF::Query
14
- include R2RDF::Parser
4
+ extend PubliSci::Dataset::DataCube
5
+ extend PubliSci::Analyzer
6
+ # extend PubliSci::Metadata
7
+ extend PubliSci::Query
8
+ extend PubliSci::Parser
9
+
10
+ include PubliSci::Dataset::DataCube
11
+ include PubliSci::Analyzer
12
+ include PubliSci::Metadata::Generator
13
+ include PubliSci::Query
14
+ include PubliSci::Parser
15
15
 
16
16
  attr_accessor :labels
17
17
  attr_accessor :dimensions
@@ -1,4 +1,4 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module ORM
3
3
  class Observation
4
4
  attr_accessor :data
@@ -0,0 +1,31 @@
1
+ module PubliSci
2
+ class Dataset
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ interactive: false,
7
+ }
8
+ end
9
+
10
+ defaults.keys.each{|k|
11
+ default = defaults[k]
12
+ define_method(k) do |input=nil|
13
+ var = instance_variable_get :"@#{k}"
14
+ if var
15
+ var
16
+ else
17
+ instance_variable_set :"@#{k}", default
18
+ end
19
+
20
+ if input
21
+ instance_variable_set :"@#{k}", input
22
+ end
23
+
24
+ instance_variable_get :"@#{k}"
25
+ end
26
+
27
+ attr_writer k
28
+ }
29
+ end
30
+ end
31
+ end
@@ -5,10 +5,10 @@ class String
5
5
  end
6
6
  end
7
7
 
8
- module R2RDF
8
+ module PubliSci
9
9
  class Dataset
10
10
  module DataCube
11
- include R2RDF::Parser
11
+ include PubliSci::Parser
12
12
  def defaults
13
13
  {
14
14
  type: :dataframe,
@@ -60,6 +60,11 @@ module R2RDF
60
60
  [newm, newd, newc]
61
61
  end
62
62
 
63
+ def component_gen(args,options={})
64
+ args = Array[args].flatten
65
+ args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
66
+ end
67
+
63
68
  def encode_data(codes,data,var,options={})
64
69
  codes = sanitize(codes)
65
70
  new_data = {}
@@ -101,7 +106,7 @@ module R2RDF
101
106
  str = prefixes(var,options)
102
107
  str << data_structure_definition(measures, dimensions, codes, var, options)
103
108
  str << dataset(var, options)
104
- # component_specifications(measures, dimensions, var, options).map{ |c| str << c }
109
+ component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c }
105
110
  dimension_properties(dimensions, codes, var, options).map{|p| str << p}
106
111
  measure_properties(measures, var, options).map{|p| str << p}
107
112
  code_lists(codes, data, var, options).map{|l| str << l}
@@ -138,14 +143,15 @@ module R2RDF
138
143
  var = sanitize([var]).first
139
144
  options = defaults().merge(options)
140
145
  rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
141
-
146
+ cs_dims = component_gen(rdf_dimensions,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
147
+ cs_meas = component_gen(rdf_measures,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
142
148
  str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
143
- rdf_dimensions.map{|d|
144
- str << " qb:component [ qb:dimension #{d} ] ;\n"
149
+ cs_dims.map{|d|
150
+ str << " qb:component #{d} ;\n"
145
151
  }
146
152
 
147
- rdf_measures.map{|m|
148
- str << " qb:component [ qb:measure #{m} ] ;\n"
153
+ cs_meas.map{|m|
154
+ str << " qb:component #{m} ;\n"
149
155
  }
150
156
  str[-2]='.'
151
157
  str<<"\n"
@@ -163,24 +169,29 @@ module R2RDF
163
169
  EOF
164
170
  end
165
171
 
166
- def component_specifications(measure_names, dimension_names, var, options={})
172
+ def component_specifications(measure_names, dimension_names, codes, var, options={})
167
173
  options = defaults().merge(options)
174
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
175
+ cs_dims = component_gen(rdf_dimensions,options)
176
+ cs_meas = component_gen(rdf_measures,options)
177
+ # cs_dims = rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
178
+ # cs_meas = rdf_measures.map{|m| m.gsub('prop:','cs:')}
168
179
  specs = []
169
180
 
170
- dimension_names.map{|d|
181
+ rdf_dimensions.each_with_index.map{|d,i|
171
182
  specs << <<-EOF.unindent
172
- cs:#{d} a qb:ComponentSpecification ;
173
- rdfs:label "#{d} Component" ;
174
- qb:dimension prop:#{d} .
183
+ #{cs_dims[i]} a qb:ComponentSpecification ;
184
+ rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))} Component" ;
185
+ qb:dimension #{d} .
175
186
 
176
187
  EOF
177
188
  }
178
189
 
179
- measure_names.map{|n|
190
+ rdf_measures.each_with_index.map{|n,i|
180
191
  specs << <<-EOF.unindent
181
- cs:#{n} a qb:ComponentSpecification ;
182
- rdfs:label "#{n} Component" ;
183
- qb:measure prop:#{n} .
192
+ #{cs_meas[i]} a qb:ComponentSpecification ;
193
+ rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))} Component" ;
194
+ qb:measure #{n} .
184
195
 
185
196
  EOF
186
197
  }
@@ -0,0 +1,11 @@
1
+ module PubliSci
2
+ class Dataset
3
+ extend PubliSci::Interactive
4
+ extend PubliSci::Registry
5
+
6
+ def self.configuration
7
+ @config ||= Dataset::Configuration.new
8
+ end
9
+
10
+ end
11
+ end
@@ -1,6 +1,7 @@
1
- module R2RDF
1
+ require 'open-uri'
2
+ module PubliSci
2
3
  class Dataset
3
- extend R2RDF::Interactive
4
+ extend PubliSci::Interactive
4
5
 
5
6
  def self.for(object, options={}, ask_on_ambiguous=true)
6
7
 
@@ -23,10 +24,12 @@ module R2RDF
23
24
  when ".RData"
24
25
  r_object(object, options, ask_on_ambiguous)
25
26
  when /.csv/i
26
- R2RDF::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
27
+ PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
27
28
  end
29
+ elsif object =~ %r{http[s]://.+}
30
+ self.for(download(object).path, options, ask_on_ambiguous)
28
31
  else
29
- raise "Unable to find reader for File or String"
32
+ raise "Unable to find reader for File or String #{object}"
30
33
  end
31
34
  elsif object.is_a? Rserve::REXP
32
35
  r_object(object, options, ask_on_ambiguous)
@@ -35,6 +38,13 @@ module R2RDF
35
38
  end
36
39
  end
37
40
 
41
+ def self.download(uri)
42
+ out = Tempfile.new(uri.split('/').last)
43
+ out.write open(uri).read
44
+ out.close
45
+ out
46
+ end
47
+
38
48
  def self.r_object(object, options={}, ask_on_ambiguous=true)
39
49
  if object.is_a? String
40
50
  con = Rserve::Connection.new
@@ -49,7 +59,7 @@ module R2RDF
49
59
  r_classes = con.eval("class(#{var})").to_ruby
50
60
 
51
61
  if r_classes.include? "data.frame"
52
- df = R2RDF::Reader::Dataframe.new
62
+ df = PubliSci::Reader::Dataframe.new
53
63
  unless options[:dimensions] || !ask_on_ambiguous
54
64
  dims = con.eval("names(#{var})").to_ruby
55
65
  puts "Which dimensions? #{dims}"
@@ -70,7 +80,7 @@ module R2RDF
70
80
  df.generate_n3(con.eval(var),var,options)
71
81
 
72
82
  elsif r_classes.include? "cross"
73
- bc = R2RDF::Reader::RCross.new
83
+ bc = PubliSci::Reader::RCross.new
74
84
 
75
85
  unless options[:measures] || !ask_on_ambiguous
76
86
  pheno_names = con.eval("names(#{var}$pheno)").to_ruby
@@ -91,7 +101,7 @@ module R2RDF
91
101
  bc.generate_n3(con, var, base, options)
92
102
 
93
103
  elsif r_classes.include? "matrix"
94
- mat = R2RDF::Reader::RMatrix.new
104
+ mat = PubliSci::Reader::RMatrix.new
95
105
 
96
106
  unless options[:measures] || !ask_on_ambiguous
97
107
  puts "Row label"
@@ -118,13 +128,13 @@ module R2RDF
118
128
 
119
129
  mat.generate_n3(con, var, base, options)
120
130
  else
121
- raise "no R2RDF::Reader found for #{r_classes}"
131
+ raise "no PubliSci::Reader found for #{r_classes}"
122
132
  end
123
133
 
124
134
  elsif object.is_a? Rserve::REXP
125
135
  if object.attr.payload["class"].payload.first
126
136
 
127
- df = R2RDF::Reader::Dataframe.new
137
+ df = PubliSci::Reader::Dataframe.new
128
138
 
129
139
  var = nil
130
140
 
@@ -1,4 +1,4 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module Interactive
3
3
  #to be called by other classes if user input is required
4
4
 
@@ -0,0 +1,34 @@
1
+ module PubliSci
2
+ module DSL
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ output: :generate_n3,
7
+ abbreviate: false,
8
+ repository: :in_memory,
9
+ repository_url: 'http://localhost:8080/'
10
+ }
11
+ end
12
+
13
+ defaults.keys.each{|k|
14
+ default = defaults[k]
15
+ define_method(k) do |input=nil|
16
+ var = instance_variable_get :"@#{k}"
17
+ if var
18
+ var
19
+ else
20
+ instance_variable_set :"@#{k}", default
21
+ end
22
+
23
+ if input
24
+ instance_variable_set :"@#{k}", input
25
+ end
26
+
27
+ instance_variable_get :"@#{k}"
28
+ end
29
+
30
+ attr_writer k
31
+ }
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,91 @@
1
+ module PubliSci
2
+ class Dataset
3
+ module DSL
4
+
5
+ class Instance
6
+ include Dataset::DSL
7
+
8
+ def initialize
9
+ Dataset.registry.clear
10
+ end
11
+ end
12
+
13
+ # def interactive(value=nil)
14
+ # set_or_get('interactive',value)
15
+ # end
16
+
17
+ def object(file=nil)
18
+ set_or_get('object',file)
19
+ end
20
+ alias_method :source, :object
21
+
22
+ def dimension(*args)
23
+ if args.size == 0
24
+ add_or_get('dimension',nil)
25
+ else
26
+ args.each{|arg|
27
+ add_or_get('dimension',arg)
28
+ }
29
+ end
30
+ end
31
+
32
+ def measure(*args)
33
+ if args.size == 0
34
+ add_or_get('measure',nil)
35
+ else
36
+ args.each{|arg|
37
+ add_or_get('measure',arg)
38
+ }
39
+ end
40
+ end
41
+
42
+ def option(opt=nil,value=nil)
43
+ if opt == nil || value == nil
44
+ @dataset_generator_options
45
+ else
46
+ (@dataset_generator_options ||= {})[opt] = value
47
+ end
48
+ end
49
+ alias_method :options, :option
50
+
51
+ def settings
52
+ Dataset.configuration
53
+ end
54
+
55
+ def generate_n3
56
+ opts = {}
57
+ %w{dimension measure}.each{|field|
58
+ opts[field.to_sym] = send(field.to_sym) if send(field.to_sym)
59
+ }
60
+ interact = settings.interactive
61
+ if options
62
+ opts = opts.merge(options)
63
+ end
64
+ Dataset.for(object,opts,interact)
65
+ end
66
+
67
+ private
68
+ def set_or_get(var,input=nil)
69
+ ivar = instance_variable_get("@#{var}")
70
+
71
+ if input
72
+ instance_variable_set("@#{var}", input)
73
+ else
74
+ ivar
75
+ end
76
+ end
77
+
78
+ def add_or_get(var,input)
79
+ ivar = instance_variable_get("@#{var}")
80
+
81
+ if input
82
+ instance_variable_set("@#{var}", []) unless ivar
83
+ instance_variable_get("@#{var}") << input
84
+ instance_variable_get("@#{var}")
85
+ else
86
+ ivar
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,69 @@
1
+ module PubliSci
2
+ module DSL
3
+ attr_reader :base_url
4
+
5
+ # Use to set base url for whole script; helps when referring to dataset
6
+ # resources from metadata and
7
+ def base_url=(url)
8
+ @base_url = url
9
+ Prov.base_url=url
10
+ end
11
+
12
+ def data(&block)
13
+ inst=PubliSci::Dataset::DSL::Instance.new
14
+ inst.instance_eval(&block)
15
+ @_dsl_data ||= [] << inst
16
+ inst
17
+ end
18
+
19
+ def metadata(&block)
20
+ inst=PubliSci::Metadata::DSL::Instance.new
21
+ inst.instance_eval(&block)
22
+ @_dsl_metadata = inst
23
+ inst
24
+ end
25
+
26
+ def provenance(&block)
27
+ inst=PubliSci::Prov::DSL::Instance.new
28
+ inst.instance_eval(&block)
29
+ @_dsl_prov = inst
30
+ inst
31
+ end
32
+
33
+ def configuration
34
+ @_dsl_config ||= DSL::Configuration.new
35
+ end
36
+
37
+ def configure
38
+ yield configuration
39
+ end
40
+
41
+ def settings
42
+ configuration
43
+ end
44
+
45
+ def generate_n3
46
+ out = ""
47
+ @_dsl_data.each{|dat| out << dat.generate_n3 } if @_dsl_data
48
+ out << @_dsl_metadata.generate_n3 if @_dsl_metadata
49
+ out << @_dsl_prov.generate_n3 if @_dsl_prov
50
+ out
51
+ end
52
+
53
+ def to_repository(turtle_string=generate_n3)
54
+ repo = settings.repository
55
+ case repo
56
+ when :in_memory
57
+ repo = RDF::Repository.new
58
+ when :fourstore
59
+ repo = RDF::FourStore::Repository.new('http://localhost:8080')
60
+ end
61
+ f=Tempfile.new(['repo','.ttl'])
62
+ f.write(turtle_string)
63
+ f.close
64
+ repo.load(f.path, :format => :ttl).to_s
65
+ f.unlink
66
+ repo
67
+ end
68
+ end
69
+ end