bio-publisci 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +1 -1
  4. data/examples/prov_dsl.prov +2 -1
  5. data/examples/safe_gen.rb +7 -0
  6. data/examples/visualization/primer.prov +66 -0
  7. data/examples/visualization/prov_viz.rb +140 -0
  8. data/examples/visualization/viz.rb +35 -0
  9. data/features/metadata_steps.rb +2 -4
  10. data/features/orm_steps.rb +4 -4
  11. data/features/reader_steps.rb +1 -1
  12. data/features/store_steps.rb +1 -1
  13. data/features/writer.feature +1 -1
  14. data/features/writer_steps.rb +1 -1
  15. data/lib/bio-publisci.rb +10 -2
  16. data/lib/bio-publisci/analyzer.rb +4 -4
  17. data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
  18. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
  19. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
  20. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  21. data/lib/bio-publisci/dataset/data_cube.rb +28 -17
  22. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  23. data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
  24. data/lib/bio-publisci/dataset/interactive.rb +1 -1
  25. data/lib/bio-publisci/dsl/config.rb +34 -0
  26. data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
  27. data/lib/bio-publisci/dsl/dsl.rb +69 -0
  28. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  29. data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
  30. data/lib/bio-publisci/metadata/generator.rb +323 -0
  31. data/lib/bio-publisci/metadata/metadata.rb +3 -314
  32. data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
  33. data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
  34. data/lib/bio-publisci/metadata/prov/association.rb +2 -2
  35. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  36. data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
  37. data/lib/bio-publisci/metadata/prov/element.rb +2 -2
  38. data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
  39. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
  40. data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
  41. data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
  42. data/lib/bio-publisci/metadata/prov/role.rb +1 -1
  43. data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
  44. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  45. data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
  46. data/lib/bio-publisci/mixins/registry.rb +27 -0
  47. data/lib/bio-publisci/output.rb +1 -1
  48. data/lib/bio-publisci/parser.rb +1 -1
  49. data/lib/bio-publisci/query/query_helper.rb +14 -14
  50. data/lib/bio-publisci/r_client.rb +5 -5
  51. data/lib/bio-publisci/readers/arff.rb +5 -5
  52. data/lib/bio-publisci/readers/csv.rb +3 -3
  53. data/lib/bio-publisci/readers/dataframe.rb +3 -3
  54. data/lib/bio-publisci/readers/r_cross.rb +4 -4
  55. data/lib/bio-publisci/readers/r_matrix.rb +3 -3
  56. data/lib/bio-publisci/store.rb +3 -3
  57. data/lib/bio-publisci/writers/arff.rb +6 -6
  58. data/lib/bio-publisci/writers/dataframe.rb +5 -5
  59. data/scripts/islet_mlratio.rb +1 -1
  60. data/scripts/scan_islet.rb +1 -1
  61. data/scripts/update_reference.rb +2 -2
  62. data/spec/ORM/data_cube_orm_spec.rb +2 -2
  63. data/spec/ORM/prov_model_spec.rb +19 -0
  64. data/spec/analyzer_spec.rb +7 -7
  65. data/spec/data_cube_spec.rb +13 -13
  66. data/spec/dataset_for_spec.rb +11 -4
  67. data/spec/dsl_spec.rb +90 -0
  68. data/spec/generators/csv_spec.rb +4 -4
  69. data/spec/generators/dataframe_spec.rb +6 -6
  70. data/spec/generators/r_cross_spec.rb +2 -2
  71. data/spec/generators/r_matrix_spec.rb +2 -2
  72. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  73. data/spec/prov/activity_spec.rb +4 -4
  74. data/spec/prov/agent_spec.rb +3 -4
  75. data/spec/prov/association_spec.rb +1 -2
  76. data/spec/prov/config_spec.rb +28 -0
  77. data/spec/prov/derivation_spec.rb +30 -0
  78. data/spec/prov/entity_spec.rb +3 -4
  79. data/spec/prov/role_spec.rb +1 -2
  80. data/spec/prov/usage_spec.rb +1 -2
  81. data/spec/r_builder_spec.rb +3 -3
  82. data/spec/turtle/bacon +20 -4
  83. data/spec/turtle/reference +20 -4
  84. metadata +37 -4
@@ -1,17 +1,17 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module ORM
3
3
  class DataCube
4
- extend R2RDF::Dataset::DataCube
5
- extend R2RDF::Analyzer
6
- extend R2RDF::Metadata
7
- extend R2RDF::Query
8
- extend R2RDF::Parser
9
-
10
- include R2RDF::Dataset::DataCube
11
- include R2RDF::Analyzer
12
- include R2RDF::Metadata
13
- include R2RDF::Query
14
- include R2RDF::Parser
4
+ extend PubliSci::Dataset::DataCube
5
+ extend PubliSci::Analyzer
6
+ # extend PubliSci::Metadata
7
+ extend PubliSci::Query
8
+ extend PubliSci::Parser
9
+
10
+ include PubliSci::Dataset::DataCube
11
+ include PubliSci::Analyzer
12
+ include PubliSci::Metadata::Generator
13
+ include PubliSci::Query
14
+ include PubliSci::Parser
15
15
 
16
16
  attr_accessor :labels
17
17
  attr_accessor :dimensions
@@ -1,4 +1,4 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module ORM
3
3
  class Observation
4
4
  attr_accessor :data
@@ -0,0 +1,31 @@
1
+ module PubliSci
2
+ class Dataset
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ interactive: false,
7
+ }
8
+ end
9
+
10
+ defaults.keys.each{|k|
11
+ default = defaults[k]
12
+ define_method(k) do |input=nil|
13
+ var = instance_variable_get :"@#{k}"
14
+ if var
15
+ var
16
+ else
17
+ instance_variable_set :"@#{k}", default
18
+ end
19
+
20
+ if input
21
+ instance_variable_set :"@#{k}", input
22
+ end
23
+
24
+ instance_variable_get :"@#{k}"
25
+ end
26
+
27
+ attr_writer k
28
+ }
29
+ end
30
+ end
31
+ end
@@ -5,10 +5,10 @@ class String
5
5
  end
6
6
  end
7
7
 
8
- module R2RDF
8
+ module PubliSci
9
9
  class Dataset
10
10
  module DataCube
11
- include R2RDF::Parser
11
+ include PubliSci::Parser
12
12
  def defaults
13
13
  {
14
14
  type: :dataframe,
@@ -60,6 +60,11 @@ module R2RDF
60
60
  [newm, newd, newc]
61
61
  end
62
62
 
63
+ def component_gen(args,options={})
64
+ args = Array[args].flatten
65
+ args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
66
+ end
67
+
63
68
  def encode_data(codes,data,var,options={})
64
69
  codes = sanitize(codes)
65
70
  new_data = {}
@@ -101,7 +106,7 @@ module R2RDF
101
106
  str = prefixes(var,options)
102
107
  str << data_structure_definition(measures, dimensions, codes, var, options)
103
108
  str << dataset(var, options)
104
- # component_specifications(measures, dimensions, var, options).map{ |c| str << c }
109
+ component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c }
105
110
  dimension_properties(dimensions, codes, var, options).map{|p| str << p}
106
111
  measure_properties(measures, var, options).map{|p| str << p}
107
112
  code_lists(codes, data, var, options).map{|l| str << l}
@@ -138,14 +143,15 @@ module R2RDF
138
143
  var = sanitize([var]).first
139
144
  options = defaults().merge(options)
140
145
  rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
141
-
146
+ cs_dims = component_gen(rdf_dimensions,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
147
+ cs_meas = component_gen(rdf_measures,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
142
148
  str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
143
- rdf_dimensions.map{|d|
144
- str << " qb:component [ qb:dimension #{d} ] ;\n"
149
+ cs_dims.map{|d|
150
+ str << " qb:component #{d} ;\n"
145
151
  }
146
152
 
147
- rdf_measures.map{|m|
148
- str << " qb:component [ qb:measure #{m} ] ;\n"
153
+ cs_meas.map{|m|
154
+ str << " qb:component #{m} ;\n"
149
155
  }
150
156
  str[-2]='.'
151
157
  str<<"\n"
@@ -163,24 +169,29 @@ module R2RDF
163
169
  EOF
164
170
  end
165
171
 
166
- def component_specifications(measure_names, dimension_names, var, options={})
172
+ def component_specifications(measure_names, dimension_names, codes, var, options={})
167
173
  options = defaults().merge(options)
174
+ rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
175
+ cs_dims = component_gen(rdf_dimensions,options)
176
+ cs_meas = component_gen(rdf_measures,options)
177
+ # cs_dims = rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
178
+ # cs_meas = rdf_measures.map{|m| m.gsub('prop:','cs:')}
168
179
  specs = []
169
180
 
170
- dimension_names.map{|d|
181
+ rdf_dimensions.each_with_index.map{|d,i|
171
182
  specs << <<-EOF.unindent
172
- cs:#{d} a qb:ComponentSpecification ;
173
- rdfs:label "#{d} Component" ;
174
- qb:dimension prop:#{d} .
183
+ #{cs_dims[i]} a qb:ComponentSpecification ;
184
+ rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))} Component" ;
185
+ qb:dimension #{d} .
175
186
 
176
187
  EOF
177
188
  }
178
189
 
179
- measure_names.map{|n|
190
+ rdf_measures.each_with_index.map{|n,i|
180
191
  specs << <<-EOF.unindent
181
- cs:#{n} a qb:ComponentSpecification ;
182
- rdfs:label "#{n} Component" ;
183
- qb:measure prop:#{n} .
192
+ #{cs_meas[i]} a qb:ComponentSpecification ;
193
+ rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))} Component" ;
194
+ qb:measure #{n} .
184
195
 
185
196
  EOF
186
197
  }
@@ -0,0 +1,11 @@
1
+ module PubliSci
2
+ class Dataset
3
+ extend PubliSci::Interactive
4
+ extend PubliSci::Registry
5
+
6
+ def self.configuration
7
+ @config ||= Dataset::Configuration.new
8
+ end
9
+
10
+ end
11
+ end
@@ -1,6 +1,7 @@
1
- module R2RDF
1
+ require 'open-uri'
2
+ module PubliSci
2
3
  class Dataset
3
- extend R2RDF::Interactive
4
+ extend PubliSci::Interactive
4
5
 
5
6
  def self.for(object, options={}, ask_on_ambiguous=true)
6
7
 
@@ -23,10 +24,12 @@ module R2RDF
23
24
  when ".RData"
24
25
  r_object(object, options, ask_on_ambiguous)
25
26
  when /.csv/i
26
- R2RDF::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
27
+ PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
27
28
  end
29
+ elsif object =~ %r{http[s]://.+}
30
+ self.for(download(object).path, options, ask_on_ambiguous)
28
31
  else
29
- raise "Unable to find reader for File or String"
32
+ raise "Unable to find reader for File or String #{object}"
30
33
  end
31
34
  elsif object.is_a? Rserve::REXP
32
35
  r_object(object, options, ask_on_ambiguous)
@@ -35,6 +38,13 @@ module R2RDF
35
38
  end
36
39
  end
37
40
 
41
+ def self.download(uri)
42
+ out = Tempfile.new(uri.split('/').last)
43
+ out.write open(uri).read
44
+ out.close
45
+ out
46
+ end
47
+
38
48
  def self.r_object(object, options={}, ask_on_ambiguous=true)
39
49
  if object.is_a? String
40
50
  con = Rserve::Connection.new
@@ -49,7 +59,7 @@ module R2RDF
49
59
  r_classes = con.eval("class(#{var})").to_ruby
50
60
 
51
61
  if r_classes.include? "data.frame"
52
- df = R2RDF::Reader::Dataframe.new
62
+ df = PubliSci::Reader::Dataframe.new
53
63
  unless options[:dimensions] || !ask_on_ambiguous
54
64
  dims = con.eval("names(#{var})").to_ruby
55
65
  puts "Which dimensions? #{dims}"
@@ -70,7 +80,7 @@ module R2RDF
70
80
  df.generate_n3(con.eval(var),var,options)
71
81
 
72
82
  elsif r_classes.include? "cross"
73
- bc = R2RDF::Reader::RCross.new
83
+ bc = PubliSci::Reader::RCross.new
74
84
 
75
85
  unless options[:measures] || !ask_on_ambiguous
76
86
  pheno_names = con.eval("names(#{var}$pheno)").to_ruby
@@ -91,7 +101,7 @@ module R2RDF
91
101
  bc.generate_n3(con, var, base, options)
92
102
 
93
103
  elsif r_classes.include? "matrix"
94
- mat = R2RDF::Reader::RMatrix.new
104
+ mat = PubliSci::Reader::RMatrix.new
95
105
 
96
106
  unless options[:measures] || !ask_on_ambiguous
97
107
  puts "Row label"
@@ -118,13 +128,13 @@ module R2RDF
118
128
 
119
129
  mat.generate_n3(con, var, base, options)
120
130
  else
121
- raise "no R2RDF::Reader found for #{r_classes}"
131
+ raise "no PubliSci::Reader found for #{r_classes}"
122
132
  end
123
133
 
124
134
  elsif object.is_a? Rserve::REXP
125
135
  if object.attr.payload["class"].payload.first
126
136
 
127
- df = R2RDF::Reader::Dataframe.new
137
+ df = PubliSci::Reader::Dataframe.new
128
138
 
129
139
  var = nil
130
140
 
@@ -1,4 +1,4 @@
1
- module R2RDF
1
+ module PubliSci
2
2
  module Interactive
3
3
  #to be called by other classes if user input is required
4
4
 
@@ -0,0 +1,34 @@
1
+ module PubliSci
2
+ module DSL
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ output: :generate_n3,
7
+ abbreviate: false,
8
+ repository: :in_memory,
9
+ repository_url: 'http://localhost:8080/'
10
+ }
11
+ end
12
+
13
+ defaults.keys.each{|k|
14
+ default = defaults[k]
15
+ define_method(k) do |input=nil|
16
+ var = instance_variable_get :"@#{k}"
17
+ if var
18
+ var
19
+ else
20
+ instance_variable_set :"@#{k}", default
21
+ end
22
+
23
+ if input
24
+ instance_variable_set :"@#{k}", input
25
+ end
26
+
27
+ instance_variable_get :"@#{k}"
28
+ end
29
+
30
+ attr_writer k
31
+ }
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,91 @@
1
+ module PubliSci
2
+ class Dataset
3
+ module DSL
4
+
5
+ class Instance
6
+ include Dataset::DSL
7
+
8
+ def initialize
9
+ Dataset.registry.clear
10
+ end
11
+ end
12
+
13
+ # def interactive(value=nil)
14
+ # set_or_get('interactive',value)
15
+ # end
16
+
17
+ def object(file=nil)
18
+ set_or_get('object',file)
19
+ end
20
+ alias_method :source, :object
21
+
22
+ def dimension(*args)
23
+ if args.size == 0
24
+ add_or_get('dimension',nil)
25
+ else
26
+ args.each{|arg|
27
+ add_or_get('dimension',arg)
28
+ }
29
+ end
30
+ end
31
+
32
+ def measure(*args)
33
+ if args.size == 0
34
+ add_or_get('measure',nil)
35
+ else
36
+ args.each{|arg|
37
+ add_or_get('measure',arg)
38
+ }
39
+ end
40
+ end
41
+
42
+ def option(opt=nil,value=nil)
43
+ if opt == nil || value == nil
44
+ @dataset_generator_options
45
+ else
46
+ (@dataset_generator_options ||= {})[opt] = value
47
+ end
48
+ end
49
+ alias_method :options, :option
50
+
51
+ def settings
52
+ Dataset.configuration
53
+ end
54
+
55
+ def generate_n3
56
+ opts = {}
57
+ %w{dimension measure}.each{|field|
58
+ opts[field.to_sym] = send(field.to_sym) if send(field.to_sym)
59
+ }
60
+ interact = settings.interactive
61
+ if options
62
+ opts = opts.merge(options)
63
+ end
64
+ Dataset.for(object,opts,interact)
65
+ end
66
+
67
+ private
68
+ def set_or_get(var,input=nil)
69
+ ivar = instance_variable_get("@#{var}")
70
+
71
+ if input
72
+ instance_variable_set("@#{var}", input)
73
+ else
74
+ ivar
75
+ end
76
+ end
77
+
78
+ def add_or_get(var,input)
79
+ ivar = instance_variable_get("@#{var}")
80
+
81
+ if input
82
+ instance_variable_set("@#{var}", []) unless ivar
83
+ instance_variable_get("@#{var}") << input
84
+ instance_variable_get("@#{var}")
85
+ else
86
+ ivar
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,69 @@
1
+ module PubliSci
2
+ module DSL
3
+ attr_reader :base_url
4
+
5
+ # Use to set base url for whole script; helps when referring to dataset
6
+ # resources from metadata and
7
+ def base_url=(url)
8
+ @base_url = url
9
+ Prov.base_url=url
10
+ end
11
+
12
+ def data(&block)
13
+ inst=PubliSci::Dataset::DSL::Instance.new
14
+ inst.instance_eval(&block)
15
+ @_dsl_data ||= [] << inst
16
+ inst
17
+ end
18
+
19
+ def metadata(&block)
20
+ inst=PubliSci::Metadata::DSL::Instance.new
21
+ inst.instance_eval(&block)
22
+ @_dsl_metadata = inst
23
+ inst
24
+ end
25
+
26
+ def provenance(&block)
27
+ inst=PubliSci::Prov::DSL::Instance.new
28
+ inst.instance_eval(&block)
29
+ @_dsl_prov = inst
30
+ inst
31
+ end
32
+
33
+ def configuration
34
+ @_dsl_config ||= DSL::Configuration.new
35
+ end
36
+
37
+ def configure
38
+ yield configuration
39
+ end
40
+
41
+ def settings
42
+ configuration
43
+ end
44
+
45
+ def generate_n3
46
+ out = ""
47
+ @_dsl_data.each{|dat| out << dat.generate_n3 } if @_dsl_data
48
+ out << @_dsl_metadata.generate_n3 if @_dsl_metadata
49
+ out << @_dsl_prov.generate_n3 if @_dsl_prov
50
+ out
51
+ end
52
+
53
+ def to_repository(turtle_string=generate_n3)
54
+ repo = settings.repository
55
+ case repo
56
+ when :in_memory
57
+ repo = RDF::Repository.new
58
+ when :fourstore
59
+ repo = RDF::FourStore::Repository.new('http://localhost:8080')
60
+ end
61
+ f=Tempfile.new(['repo','.ttl'])
62
+ f.write(turtle_string)
63
+ f.close
64
+ repo.load(f.path, :format => :ttl).to_s
65
+ f.unlink
66
+ repo
67
+ end
68
+ end
69
+ end