bio-publisci 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +5 -5
  4. data/bin/bio-publisci +34 -11
  5. data/examples/bio-band_integration.rb +9 -0
  6. data/examples/no_magic.prov +40 -0
  7. data/examples/primer.prov +28 -0
  8. data/examples/prov_dsl.prov +51 -0
  9. data/features/create_generator.feature +5 -9
  10. data/features/integration_steps.rb +8 -8
  11. data/features/metadata.feature +15 -2
  12. data/features/metadata_steps.rb +21 -0
  13. data/features/orm_steps.rb +5 -5
  14. data/features/prov_dsl.feature +14 -0
  15. data/features/prov_dsl_steps.rb +11 -0
  16. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
  17. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
  18. data/lib/bio-publisci/dataset/data_cube.rb +30 -26
  19. data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
  20. data/lib/bio-publisci/metadata/metadata.rb +180 -42
  21. data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
  22. data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
  23. data/lib/bio-publisci/metadata/prov/association.rb +73 -0
  24. data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
  25. data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
  26. data/lib/bio-publisci/metadata/prov/element.rb +52 -0
  27. data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
  28. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  29. data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
  30. data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
  31. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  32. data/lib/bio-publisci/output.rb +27 -0
  33. data/lib/bio-publisci/parser.rb +17 -8
  34. data/lib/bio-publisci/readers/csv.rb +9 -7
  35. data/lib/bio-publisci/readers/dataframe.rb +9 -8
  36. data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
  37. data/lib/bio-publisci/readers/r_matrix.rb +37 -13
  38. data/lib/bio-publisci/spira.rb +82 -0
  39. data/lib/bio-publisci/writers/dataframe.rb +65 -65
  40. data/lib/bio-publisci.rb +9 -4
  41. data/spec/ORM/data_cube_orm_spec.rb +3 -3
  42. data/spec/dataset_for_spec.rb +29 -0
  43. data/spec/generators/r_cross_spec.rb +51 -0
  44. data/spec/generators/r_matrix_spec.rb +14 -5
  45. metadata +42 -8
  46. data/lib/bio-publisci/readers/cross.rb +0 -72
@@ -1,241 +1,239 @@
1
1
  module R2RDF
2
- class Dataset
3
- module ORM
4
- class DataCube
5
- extend R2RDF::Dataset::DataCube
6
- extend R2RDF::Analyzer
7
- extend R2RDF::Metadata
8
- extend R2RDF::Query
9
- extend R2RDF::Parser
10
-
11
- include R2RDF::Dataset::DataCube
12
- include R2RDF::Analyzer
13
- include R2RDF::Metadata
14
- include R2RDF::Query
15
- include R2RDF::Parser
16
-
17
- attr_accessor :labels
18
- attr_accessor :dimensions
19
- attr_accessor :measures
20
- attr_accessor :obs
21
- attr_accessor :meta
22
-
23
- def initialize(options={},do_parse = true)
24
- @dimensions = {}
25
- @measures = []
26
- @obs = []
27
- @generator_options = {}
28
- @options = {}
29
-
30
- @meta = {}
31
-
32
- parse_options options if do_parse
33
- end
34
-
35
- def self.load(graph,options={},verbose=false)
36
-
37
-
38
- graph = create_graph(graph) unless graph =~ /^http/
39
-
40
- # puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
41
- dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
42
- #TODO coded properties should be found via SPARQL queries
43
- if solution[:range].split('/')[-2] == "code"
44
- type = :coded
45
- else
46
- type = solution[:range].to_s
47
- end
48
- [solution[:dimension], {type: type}]
49
- }]
50
- puts "dimensions: #{dimensions}" if verbose
51
-
52
- codes = execute_from_file('code_resources.rq',graph).to_h.map{|sol|
53
- [sol[:dimension].to_s, sol[:codeList].to_s, sol[:class].to_s]
2
+ module ORM
3
+ class DataCube
4
+ extend R2RDF::Dataset::DataCube
5
+ extend R2RDF::Analyzer
6
+ extend R2RDF::Metadata
7
+ extend R2RDF::Query
8
+ extend R2RDF::Parser
9
+
10
+ include R2RDF::Dataset::DataCube
11
+ include R2RDF::Analyzer
12
+ include R2RDF::Metadata
13
+ include R2RDF::Query
14
+ include R2RDF::Parser
15
+
16
+ attr_accessor :labels
17
+ attr_accessor :dimensions
18
+ attr_accessor :measures
19
+ attr_accessor :obs
20
+ attr_accessor :meta
21
+
22
+ def initialize(options={},do_parse = true)
23
+ @dimensions = {}
24
+ @measures = []
25
+ @obs = []
26
+ @generator_options = {}
27
+ @options = {}
28
+
29
+ @meta = {}
30
+
31
+ parse_options options if do_parse
32
+ end
33
+
34
+ def self.load(graph,options={},verbose=false)
35
+
36
+
37
+ graph = create_graph(graph) unless graph =~ /^http/
38
+
39
+ # puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
40
+ dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
41
+ #TODO coded properties should be found via SPARQL queries
42
+ if solution[:range].split('/')[-2] == "code"
43
+ type = :coded
44
+ else
45
+ type = solution[:range].to_s
46
+ end
47
+ [solution[:dimension], {type: type}]
48
+ }]
49
+ puts "dimensions: #{dimensions}" if verbose
50
+
51
+ codes = execute_from_file('code_resources.rq',graph).to_h.map{|sol|
52
+ [sol[:dimension].to_s, sol[:codeList].to_s, sol[:class].to_s]
53
+ }
54
+ puts "codes: #{codes}" if verbose
55
+
56
+ measures = execute_from_file('measures.rq',graph).to_h.map{|m| m[:measure].to_s}
57
+ puts "measures: #{measures}" if verbose
58
+
59
+ name = execute_from_file('dataset.rq',graph).to_h.first[:label]
60
+ puts "dataset: #{name}" if verbose
61
+
62
+ obs = execute_from_file('observations.rq',graph)
63
+ observations = observation_hash(obs)
64
+ puts "observations: #{observations}" if verbose
65
+
66
+ # simple_observations = observation_hash(obs,true)
67
+
68
+ labels = execute_from_file('observation_labels.rq', graph)
69
+ labels = Hash[labels.map{|sol|
70
+ [sol[:observation].to_s, sol[:label].to_s]
71
+ }]
72
+
73
+ new_opts = {
74
+ measures: measures,
75
+ dimensions: dimensions,
76
+ observations: observations.values,
77
+ name: name,
78
+ labels: labels.values,
79
+ codes: codes
80
+ }
81
+
82
+ options = options.merge(new_opts)
83
+ puts "creating #{options}" if verbose
84
+ self.new(options)
85
+ end
86
+
87
+ def parse_options(options)
88
+ if options[:dimensions]
89
+ options[:dimensions].each{|name,details|
90
+ add_dimension(name, details[:type] || :coded)
54
91
  }
55
- puts "codes: #{codes}" if verbose
56
-
57
- measures = execute_from_file('measures.rq',graph).to_h.map{|m| m[:measure].to_s}
58
- puts "measures: #{measures}" if verbose
59
-
60
- name = execute_from_file('dataset.rq',graph).to_h.first[:label]
61
- puts "dataset: #{name}" if verbose
62
-
63
- obs = execute_from_file('observations.rq',graph)
64
- observations = observation_hash(obs)
65
- puts "observations: #{observations}" if verbose
66
-
67
- # simple_observations = observation_hash(obs,true)
68
-
69
- labels = execute_from_file('observation_labels.rq', graph)
70
- labels = Hash[labels.map{|sol|
71
- [sol[:observation].to_s, sol[:label].to_s]
72
- }]
73
-
74
- new_opts = {
75
- measures: measures,
76
- dimensions: dimensions,
77
- observations: observations.values,
78
- name: name,
79
- labels: labels.values,
80
- codes: codes
81
- }
82
-
83
- options = options.merge(new_opts)
84
- puts "creating #{options}" if verbose
85
- self.new(options)
86
- end
87
-
88
- def parse_options(options)
89
- if options[:dimensions]
90
- options[:dimensions].each{|name,details|
91
- add_dimension(name, details[:type] || :coded)
92
- }
93
- end
94
-
95
- if options[:measures]
96
- options[:measures].each{|m| @measures << m}
97
- end
98
-
99
- if options[:observations]
100
- options[:observations].each{|obs_data| add_observation obs_data}
101
- end
102
-
103
- @generator_options = options[:generator_options] if options[:generator_options]
104
- @options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
105
-
106
- if options[:name]
107
- @name = options[:name]
108
- else
109
- raise "No dataset name specified!"
110
- end
111
-
112
- if options[:validate_each]
113
- @options[:validate_each] = options[:validate_each]
114
- end
115
-
116
- if options[:labels]
117
- @labels = options[:labels]
92
+ end
93
+
94
+ if options[:measures]
95
+ options[:measures].each{|m| @measures << m}
96
+ end
97
+
98
+ if options[:observations]
99
+ options[:observations].each{|obs_data| add_observation obs_data}
100
+ end
101
+
102
+ @generator_options = options[:generator_options] if options[:generator_options]
103
+ @options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
104
+
105
+ if options[:name]
106
+ @name = options[:name]
107
+ else
108
+ raise "No dataset name specified!"
109
+ end
110
+
111
+ if options[:validate_each]
112
+ @options[:validate_each] = options[:validate_each]
113
+ end
114
+
115
+ if options[:labels]
116
+ @labels = options[:labels]
117
+ end
118
+
119
+ if options[:codes]
120
+ @codes = options[:codes]
121
+ end
122
+ end
123
+
124
+ def to_n3
125
+
126
+ #create labels if not specified
127
+ unless @labels.is_a?(Array) && @labels.size == @obs.size
128
+ if @labels.is_a? Symbol
129
+ #define some automatic labeling methods
130
+ else
131
+ @labels = (1..@obs.size).to_a.map(&:to_s)
118
132
  end
133
+ end
134
+ data = {}
119
135
 
120
- if options[:codes]
121
- @codes = options[:codes]
122
- end
123
- end
124
-
125
- def to_n3
126
-
127
- #create labels if not specified
128
- unless @labels.is_a?(Array) && @labels.size == @obs.size
129
- if @labels.is_a? Symbol
130
- #define some automatic labeling methods
131
- else
132
- @labels = (1..@obs.size).to_a.map(&:to_s)
133
- end
134
- end
135
- data = {}
136
-
137
-
138
- #collect observation data
139
- check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
140
- @obs.map{|obs|
141
- (@measures | @dimensions.keys).map{ |component|
142
- (data[component] ||= []) << obs.data[component]
143
- }
144
- }
145
-
146
-
147
- @codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact unless @codes
148
- str = generate(@measures, @dimensions.keys, @codes, data, @labels, @name, @generator_options)
149
- unless @options[:skip_metadata]
150
- fields = {
151
- publishers: publishers(),
152
- subject: subjects(),
153
- author: author(),
154
- description: description(),
155
- date: date(),
156
- var: @name,
157
- }
158
- # puts basic(fields,@generator_options)
159
- str += "\n" + basic(fields,@generator_options)
160
- end
161
- str
162
- end
163
-
164
- def add_dimension(name, type=:coded)
165
- @dimensions[name.to_s] = {type: type}
166
- end
167
-
168
- def add_measure(name)
169
- @measures << name
170
- end
171
-
172
- def add_observation(data)
173
- data = Hash[data.map{|k,v| [k.to_s, v]}]
174
- obs = Observation.new(data)
175
- check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
176
- @obs << obs
177
- end
178
-
179
- def insert(observation)
180
- @obs << observation
181
- end
182
-
183
- def publishers
184
- @meta[:publishers] ||= []
185
- end
186
-
187
- def publishers=(publishers)
188
- @meta[:publishers] = publishers
189
- end
190
-
191
- def subjects
192
- @meta[:subject] ||= []
193
- end
194
-
195
- def subjects=(subjects)
196
- @meta[:subject]=subjects
197
- end
198
-
199
- def add_publisher(label,uri)
200
- publishers << {label: label, uri: uri}
201
- end
202
-
203
- def add_subject(id)
204
- subject << id
205
- end
206
-
207
- def author
208
- @meta[:creator] ||= ""
209
- end
210
-
211
- def author=(author)
212
- @meta[:creator] = author
213
- end
214
-
215
- def description
216
- @meta[:description] ||= ""
217
- end
218
-
219
- def description=(description)
220
- @meta[:description] = description
221
- end
222
-
223
- def date
224
- @meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
225
- end
226
-
227
- def date=(date)
228
- @meta[:date] = date
229
- end
230
-
231
- def to_h
232
- {
233
- measures: @measures,
234
- dimensions: @dimensions,
235
- observations: @obs.map{|o| o.data}
236
- }
237
- end
238
- end
239
- end
240
- end
136
+
137
+ #collect observation data
138
+ check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
139
+ @obs.map{|obs|
140
+ (@measures | @dimensions.keys).map{ |component|
141
+ (data[component] ||= []) << obs.data[component]
142
+ }
143
+ }
144
+
145
+
146
+ @codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact unless @codes
147
+ str = generate(@measures, @dimensions.keys, @codes, data, @labels, @name, @generator_options)
148
+ unless @options[:skip_metadata]
149
+ fields = {
150
+ publishers: publishers(),
151
+ subject: subjects(),
152
+ author: author(),
153
+ description: description(),
154
+ date: date(),
155
+ var: @name,
156
+ }
157
+ # puts basic(fields,@generator_options)
158
+ str += "\n" + basic(fields)
159
+ end
160
+ str
161
+ end
162
+
163
+ def add_dimension(name, type=:coded)
164
+ @dimensions[name.to_s] = {type: type}
165
+ end
166
+
167
+ def add_measure(name)
168
+ @measures << name
169
+ end
170
+
171
+ def add_observation(data)
172
+ data = Hash[data.map{|k,v| [k.to_s, v]}]
173
+ obs = Observation.new(data)
174
+ check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
175
+ @obs << obs
176
+ end
177
+
178
+ def insert(observation)
179
+ @obs << observation
180
+ end
181
+
182
+ def publishers
183
+ @meta[:publishers] ||= []
184
+ end
185
+
186
+ def publishers=(publishers)
187
+ @meta[:publishers] = publishers
188
+ end
189
+
190
+ def subjects
191
+ @meta[:subject] ||= []
192
+ end
193
+
194
+ def subjects=(subjects)
195
+ @meta[:subject]=subjects
196
+ end
197
+
198
+ def add_publisher(label,uri)
199
+ publishers << {label: label, uri: uri}
200
+ end
201
+
202
+ def add_subject(id)
203
+ subject << id
204
+ end
205
+
206
+ def author
207
+ @meta[:creator] ||= ""
208
+ end
209
+
210
+ def author=(author)
211
+ @meta[:creator] = author
212
+ end
213
+
214
+ def description
215
+ @meta[:description] ||= ""
216
+ end
217
+
218
+ def description=(description)
219
+ @meta[:description] = description
220
+ end
221
+
222
+ def date
223
+ @meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
224
+ end
225
+
226
+ def date=(date)
227
+ @meta[:date] = date
228
+ end
229
+
230
+ def to_h
231
+ {
232
+ measures: @measures,
233
+ dimensions: @dimensions,
234
+ observations: @obs.map{|o| o.data}
235
+ }
236
+ end
237
+ end
238
+ end
241
239
  end
@@ -1,8 +1,7 @@
1
1
  module R2RDF
2
- class Dataset
3
2
  module ORM
4
3
  class Observation
5
- attr_accessor :data
4
+ attr_accessor :data
6
5
  def initialize(data={})
7
6
  @data = data
8
7
  end
@@ -15,6 +14,5 @@ module R2RDF
15
14
 
16
15
  end
17
16
  end
18
- end
19
17
  end
20
18
  end
@@ -1,4 +1,4 @@
1
- #monkey patch to make rdf string w/ heredocs prettier ;)
1
+ #monkey patch to make rdf string w/ heredocs prettier ;)
2
2
  class String
3
3
  def unindent
4
4
  gsub /^#{self[/\A\s*/]}/, ''
@@ -25,7 +25,7 @@ module R2RDF
25
25
  m
26
26
  else
27
27
  "prop:#{m}"
28
- end
28
+ end
29
29
  }
30
30
 
31
31
  newc = []
@@ -54,13 +54,14 @@ module R2RDF
54
54
  }
55
55
  else
56
56
  newc = codes.map{|c|
57
- ["#{c}","code:#{c.downcase}","code:#{c.downcase.capitalize}"]
57
+ ["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"]
58
58
  }
59
59
  end
60
60
  [newm, newd, newc]
61
61
  end
62
62
 
63
63
  def encode_data(codes,data,var,options={})
64
+ codes = sanitize(codes)
64
65
  new_data = {}
65
66
  data.map{|k,v|
66
67
  if codes.include? k
@@ -89,7 +90,7 @@ module R2RDF
89
90
  nil
90
91
  end
91
92
  end
92
-
93
+
93
94
  def generate(measures, dimensions, codes, data, observation_labels, var, options={})
94
95
  # dimensions = sanitize(dimensions)
95
96
  # codes = sanitize(codes)
@@ -154,7 +155,7 @@ module R2RDF
154
155
  def dataset(var,options={})
155
156
  var = sanitize([var]).first
156
157
  options = defaults().merge(options)
157
- <<-EOF.unindent
158
+ <<-EOF.unindent
158
159
  ns:dataset-#{var} a qb:DataSet ;
159
160
  rdfs:label "#{var}"@en ;
160
161
  qb:structure ns:dsd-#{var} .
@@ -165,7 +166,7 @@ module R2RDF
165
166
  def component_specifications(measure_names, dimension_names, var, options={})
166
167
  options = defaults().merge(options)
167
168
  specs = []
168
-
169
+
169
170
  dimension_names.map{|d|
170
171
  specs << <<-EOF.unindent
171
172
  cs:#{d} a qb:ComponentSpecification ;
@@ -183,7 +184,7 @@ module R2RDF
183
184
 
184
185
  EOF
185
186
  }
186
-
187
+
187
188
  specs
188
189
  end
189
190
 
@@ -192,10 +193,10 @@ module R2RDF
192
193
  rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options)
193
194
  props = []
194
195
 
195
- dimension_codes = rdf_codes.map{|c|
196
+ dimension_codes = rdf_codes.map{|c|
196
197
  if c[0]=~/^<http:/
197
- c[0][1..-2]
198
- else
198
+ c[0][1..-2]
199
+ else
199
200
  c[0]
200
201
  end
201
202
  }
@@ -218,7 +219,7 @@ module R2RDF
218
219
  EOF
219
220
  end
220
221
  }
221
-
222
+
222
223
  props
223
224
  end
224
225
 
@@ -226,43 +227,45 @@ module R2RDF
226
227
  options = defaults().merge(options)
227
228
  rdf_measures = generate_resources(measures, [], [], options)[0]
228
229
  props = []
229
-
230
+
230
231
  rdf_measures.map{ |m|
231
-
232
+
232
233
  props << <<-EOF.unindent
233
234
  #{m} a rdf:Property, qb:MeasureProperty ;
234
235
  rdfs:label "#{strip_prefixes(strip_uri(m))}"@en .
235
236
 
236
237
  EOF
237
238
  }
238
-
239
+
239
240
  props
240
241
  end
241
242
 
242
- def observations(measures, dimensions, codes, data, observation_labels, var, options={})
243
+ def observations(measures, dimensions, codes, data, observation_labels, var, options={})
243
244
  var = sanitize([var]).first
245
+ measures = sanitize(measures)
246
+ dimensions = sanitize(dimensions)
244
247
  options = defaults().merge(options)
245
248
  rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
246
249
  data = encode_data(codes, data, var, options)
247
250
  obs = []
248
-
249
- dimension_codes = rdf_codes.map{|c|
251
+
252
+ dimension_codes = rdf_codes.map{|c|
250
253
  if c[0]=~/^<http:/
251
- c[0][1..-2]
252
- else
254
+ c[0][1..-2]
255
+ else
253
256
  c[0]
254
257
  end
255
258
  }
256
259
 
257
260
  observation_labels.each_with_index.map{|r, i|
258
261
  contains_nulls = false
259
- str = <<-EOF.unindent
262
+ str = <<-EOF.unindent
260
263
  ns:obs#{r} a qb:Observation ;
261
264
  qb:dataSet ns:dataset-#{var} ;
262
265
  EOF
263
266
 
264
267
  str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
265
-
268
+
266
269
  dimensions.each_with_index{|d,j|
267
270
  contains_nulls = contains_nulls | (data[d][i] == nil)
268
271
 
@@ -276,8 +279,8 @@ module R2RDF
276
279
 
277
280
  measures.each_with_index{|m,j|
278
281
  contains_nulls = contains_nulls | (data[m][i] == nil)
279
- str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n"
280
-
282
+ str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n"
283
+
281
284
  }
282
285
 
283
286
  str << " .\n\n"
@@ -288,7 +291,7 @@ module R2RDF
288
291
  puts "missing component for observation, skipping: #{str}, "
289
292
  end
290
293
  else
291
- obs << str
294
+ obs << str
292
295
  end
293
296
  }
294
297
  obs
@@ -323,11 +326,11 @@ module R2RDF
323
326
  str << " skos:hasTopConcept #{to_resource(value,options)} ;\n"
324
327
  end
325
328
  }
326
-
329
+
327
330
  str << " .\n\n"
328
331
  lists << str
329
332
  }
330
-
333
+
331
334
 
332
335
  lists
333
336
  end
@@ -362,6 +365,7 @@ module R2RDF
362
365
 
363
366
  def abbreviate_known(turtle_string)
364
367
  #debug method
368
+ # puts turtle_string
365
369
  turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2')
366
370
  end
367
371
  end