bio-publisci 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +1 -1
  4. data/examples/prov_dsl.prov +2 -1
  5. data/examples/safe_gen.rb +7 -0
  6. data/examples/visualization/primer.prov +66 -0
  7. data/examples/visualization/prov_viz.rb +140 -0
  8. data/examples/visualization/viz.rb +35 -0
  9. data/features/metadata_steps.rb +2 -4
  10. data/features/orm_steps.rb +4 -4
  11. data/features/reader_steps.rb +1 -1
  12. data/features/store_steps.rb +1 -1
  13. data/features/writer.feature +1 -1
  14. data/features/writer_steps.rb +1 -1
  15. data/lib/bio-publisci.rb +10 -2
  16. data/lib/bio-publisci/analyzer.rb +4 -4
  17. data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
  18. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
  19. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
  20. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  21. data/lib/bio-publisci/dataset/data_cube.rb +28 -17
  22. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  23. data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
  24. data/lib/bio-publisci/dataset/interactive.rb +1 -1
  25. data/lib/bio-publisci/dsl/config.rb +34 -0
  26. data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
  27. data/lib/bio-publisci/dsl/dsl.rb +69 -0
  28. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  29. data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
  30. data/lib/bio-publisci/metadata/generator.rb +323 -0
  31. data/lib/bio-publisci/metadata/metadata.rb +3 -314
  32. data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
  33. data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
  34. data/lib/bio-publisci/metadata/prov/association.rb +2 -2
  35. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  36. data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
  37. data/lib/bio-publisci/metadata/prov/element.rb +2 -2
  38. data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
  39. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
  40. data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
  41. data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
  42. data/lib/bio-publisci/metadata/prov/role.rb +1 -1
  43. data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
  44. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  45. data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
  46. data/lib/bio-publisci/mixins/registry.rb +27 -0
  47. data/lib/bio-publisci/output.rb +1 -1
  48. data/lib/bio-publisci/parser.rb +1 -1
  49. data/lib/bio-publisci/query/query_helper.rb +14 -14
  50. data/lib/bio-publisci/r_client.rb +5 -5
  51. data/lib/bio-publisci/readers/arff.rb +5 -5
  52. data/lib/bio-publisci/readers/csv.rb +3 -3
  53. data/lib/bio-publisci/readers/dataframe.rb +3 -3
  54. data/lib/bio-publisci/readers/r_cross.rb +4 -4
  55. data/lib/bio-publisci/readers/r_matrix.rb +3 -3
  56. data/lib/bio-publisci/store.rb +3 -3
  57. data/lib/bio-publisci/writers/arff.rb +6 -6
  58. data/lib/bio-publisci/writers/dataframe.rb +5 -5
  59. data/scripts/islet_mlratio.rb +1 -1
  60. data/scripts/scan_islet.rb +1 -1
  61. data/scripts/update_reference.rb +2 -2
  62. data/spec/ORM/data_cube_orm_spec.rb +2 -2
  63. data/spec/ORM/prov_model_spec.rb +19 -0
  64. data/spec/analyzer_spec.rb +7 -7
  65. data/spec/data_cube_spec.rb +13 -13
  66. data/spec/dataset_for_spec.rb +11 -4
  67. data/spec/dsl_spec.rb +90 -0
  68. data/spec/generators/csv_spec.rb +4 -4
  69. data/spec/generators/dataframe_spec.rb +6 -6
  70. data/spec/generators/r_cross_spec.rb +2 -2
  71. data/spec/generators/r_matrix_spec.rb +2 -2
  72. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  73. data/spec/prov/activity_spec.rb +4 -4
  74. data/spec/prov/agent_spec.rb +3 -4
  75. data/spec/prov/association_spec.rb +1 -2
  76. data/spec/prov/config_spec.rb +28 -0
  77. data/spec/prov/derivation_spec.rb +30 -0
  78. data/spec/prov/entity_spec.rb +3 -4
  79. data/spec/prov/role_spec.rb +1 -2
  80. data/spec/prov/usage_spec.rb +1 -2
  81. data/spec/r_builder_spec.rb +3 -3
  82. data/spec/turtle/bacon +20 -4
  83. data/spec/turtle/reference +20 -4
  84. metadata +37 -4
@@ -0,0 +1,85 @@
1
+ module PubliSci
2
+ class Metadata
3
+ module DSL
4
+
5
+ class Instance
6
+ include Metadata::DSL
7
+
8
+ def initialize
9
+ Metadata.registry.clear
10
+ end
11
+ end
12
+
13
+ def var(name=nil)
14
+ set_or_get('var',name)
15
+ end
16
+ alias_method :dataset, :var
17
+
18
+ def creator(id=nil)
19
+ set_or_get('creator',id)
20
+ end
21
+ alias_method :name, :creator
22
+
23
+ def description(desc=nil)
24
+ set_or_get('description',desc)
25
+ end
26
+
27
+ def title(desc=nil)
28
+ set_or_get('title',desc)
29
+ end
30
+
31
+ def topic(sub=nil)
32
+ add_or_get('topic',sub)
33
+ end
34
+
35
+ def publishers(pub=nil,&block)
36
+ if block_given?
37
+ p = Publisher.new
38
+ p.instance_eval(&block)
39
+ @publishers ||= [] << p
40
+ p
41
+ else
42
+ add_or_get('publishers',pub)
43
+ end
44
+ end
45
+ alias_method :publisher, :publishers
46
+
47
+ def generate_n3
48
+ opts = {}
49
+ %w{var creator description title}.each{|field|
50
+ opts[field.to_sym] = send(field.to_sym) if send(field.to_sym)
51
+ }
52
+ opts[:subject] = topic if topic
53
+ publishers.each{|pub|
54
+ opts[:publishers] ||= [] << {label: pub.label, uri: pub.uri}
55
+ } if publishers
56
+ gen = Class.new {include PubliSci::Metadata::Generator}
57
+
58
+ gen.new.basic(opts)
59
+ end
60
+
61
+ private
62
+ def set_or_get(var,input=nil)
63
+ ivar = instance_variable_get("@#{var}")
64
+
65
+ if input
66
+ instance_variable_set("@#{var}", input)
67
+ else
68
+ ivar
69
+ end
70
+ end
71
+
72
+ def add_or_get(var,input)
73
+ ivar = instance_variable_get("@#{var}")
74
+
75
+ if input
76
+ instance_variable_set("@#{var}", []) unless ivar
77
+ instance_variable_get("@#{var}") << input
78
+ instance_variable_get("@#{var}")
79
+ else
80
+ ivar
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -1,19 +1,28 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  module DSL
4
4
 
5
5
  include PubliSci::Vocabulary
6
6
 
7
- class Singleton
7
+ class Instance
8
8
  include Prov::DSL
9
-
10
9
  def initialize
11
10
  Prov.registry.clear
11
+ Prov.reset_settings
12
12
  end
13
13
  end
14
14
 
15
15
  def self.included(mod)
16
16
  Prov.registry.clear
17
+ Prov.reset_settings
18
+ end
19
+
20
+ # def configure(&block)
21
+ # Prov.configuration.instance_eval(&block)
22
+ # end
23
+
24
+ def configure
25
+ yield Prov.configuration
17
26
  end
18
27
 
19
28
  def named_element(name,element_class,args={},&block)
@@ -30,7 +39,7 @@ module PubliSci
30
39
  end
31
40
  end
32
41
 
33
- def agent(name,args={}, &block)
42
+ def agent(name, args={}, &block)
34
43
  named_element(name,Prov::Agent,args,&block)
35
44
  end
36
45
 
@@ -75,18 +84,23 @@ module PubliSci
75
84
  end
76
85
  end
77
86
 
87
+ def settings
88
+ Prov.configuration
89
+ end
90
+
78
91
  def return_objects
79
92
  Prov.registry
80
93
  end
81
94
 
82
- def to_repository(repo=:in_memory,turtle_string=(Prov.prefixes+generate_n3))
95
+ def to_repository(turtle_string=(Prov.prefixes+generate_n3))
96
+ repo = settings.repository
83
97
  case repo
84
98
  when :in_memory
85
99
  repo = RDF::Repository.new
86
100
  when :fourstore
87
101
  repo = RDF::FourStore::Repository.new('http://localhost:8080')
88
102
  end
89
- f = Tempfile.new('repo')
103
+ f = Tempfile.new(['repo','.ttl'])
90
104
  f.write(turtle_string)
91
105
  f.close
92
106
  repo.load(f.path, :format => :ttl)
@@ -94,6 +108,16 @@ module PubliSci
94
108
  repo
95
109
  end
96
110
 
111
+ def output
112
+ cfg = Prov.configuration
113
+ case cfg.output
114
+ when :generate_n3
115
+ generate_n3(cfg.abbreviate)
116
+ when :to_repository
117
+ raise "not implemented yet"
118
+ end
119
+ end
120
+
97
121
  private
98
122
  def try_auto_set(object,method,args)
99
123
  if object.methods.include? method
@@ -0,0 +1,323 @@
1
+ class String
2
+ def unindent
3
+ gsub /^#{self[/\A\s*/]}/, ''
4
+ end
5
+ end
6
+
7
+ module PubliSci
8
+ class Metadata
9
+ module Generator
10
+ include PubliSci::Parser
11
+
12
+ def defaults
13
+ {
14
+ encode_nulls: false,
15
+ base_url: "http://www.rqtl.org",
16
+ }
17
+ end
18
+
19
+ def basic(fields)
20
+ #TODO don't assume base dataset is "ns:dataset-var",
21
+ #make it just "var", and try to make that clear to calling classes
22
+
23
+ fields[:var] = sanitize([fields[:var]]).first
24
+
25
+ unless fields[:creator]
26
+ if ENV['USER']
27
+ fields[:creator] = ENV['USER']
28
+ elsif ENV['USERNAME']
29
+ fields[:creator] = ENV['USERNAME']
30
+ end
31
+ end
32
+
33
+ fields[:date] = Time.now.strftime("%Y-%m-%d") unless fields[:date]
34
+
35
+ #TODO some of these should probably be resources, eg dct:creator, or put under DC namespace
36
+ str = <<-EOF.unindent
37
+ ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}";
38
+ dct:title "#{fields[:title]}";
39
+ dct:creator "#{fields[:creator]}";
40
+ rdfs:comment "#{fields[:description]}";
41
+ dct:description "#{fields[:description]}";
42
+ dct:issued "#{fields[:date]}"^^xsd:date;
43
+ EOF
44
+
45
+ end_str = ""
46
+
47
+ if fields[:subject] && fields[:subject].size > 0
48
+ str << " dct:subject"
49
+ fields[:subject].each{|subject|
50
+ sub = RDF::Resource(subject)
51
+ sub = RDF::Literal(subject) unless sub.valid?
52
+
53
+ str << " " + sub.to_base + ",\n"
54
+ }
55
+ str[-2] = ";"
56
+ end
57
+
58
+ if fields[:publishers]
59
+ fields[:publishers].map{|publisher|
60
+ raise "No URI for publisher #{publisher}" unless publisher[:uri]
61
+ raise "No label for publisher #{publisher}" unless publisher[:label]
62
+ str << " dct:publisher <#{publisher[:uri]}> ;\n"
63
+ end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n rdfs:label \"#{publisher[:label]}\" .\n\n"
64
+ }
65
+ str[-2] = '.'
66
+ end
67
+
68
+ str + "\n" + end_str
69
+ end
70
+
71
+ def provenance(original, triplified, chain, options={})
72
+ #TODO: should either add a prefixes method or replace some with full URIs
73
+ raise "MissingOriginal: must specify a provenance source" unless original && original[:resource]
74
+
75
+ #TODO include file type etc, or create a separate method for it
76
+
77
+ str = <<-EOF.unindent
78
+ <#{original[:resource]}> a prov:Entity ;
79
+ prov:wasGeneratredBy ns:activity-1 .
80
+
81
+ ns:activity-1 a prov:Activity ;
82
+ prov:generated <#{original[:resource]}> .
83
+
84
+ EOF
85
+
86
+ if original[:software]
87
+ original_assoc_id = Time.now.nsec.to_s(32)
88
+
89
+
90
+ str << <<-EOF.unindent
91
+ <#{original[:software]}> a prov:Entity.
92
+
93
+ ns:activity-1 prov:qualifiedAssociation ns:assoc-1_#{original_assoc_id} .
94
+
95
+ ns:assoc-1_#{original_assoc_id} a prov:Assocation ;
96
+ prov:entity <#{original[:software]}> .
97
+
98
+ EOF
99
+
100
+ if original[:process]
101
+ original[:process] = IO.read(original[:process]) if File.exist? original[:process]
102
+
103
+ steps = '"' + original[:process].split("\n").join('" "') + '"'
104
+ str << <<-EOF.unindent
105
+ ns:assoc-1_#{original_assoc_id} prov:hadPlan ns:plan-1.
106
+
107
+ ns:plan-1 a prov:Plan ;
108
+ rdfs:comment (#{steps});
109
+
110
+ EOF
111
+ end
112
+ end
113
+
114
+ if original[:author]
115
+ str << "<#{original[:author]}> a prov:Agent, prov:Person .\n"
116
+ str << "ns:activity-1 prov:wasAssociatedWith <#{original[:author]}> .\n"
117
+
118
+ str << "<#{original[:author]}> foaf:givenName \"#{original[:author_name]}\" .\n" if original[:author_name]
119
+
120
+ if original[:organization]
121
+ str << "<#{original[:author]}> prov:actedOnBehalfOf <#{original[:organization]}> .\n\n"
122
+ str << "<#{original[:organization]}> a prov:Agent, prov:Organization.\n"
123
+ if original[:organization_name]
124
+ str << "<#{original[:organization]}> foaf:name \"#{original[:organization_name]}\" .\n\n"
125
+ else
126
+ str << "\n"
127
+ end
128
+ else
129
+ str << "\n"
130
+ end
131
+ end
132
+
133
+ if triplified
134
+ triples_assoc_id = Time.now.nsec.to_s(32)
135
+
136
+ str << <<-EOF.unindent
137
+ <#{triplified[:resource]}> a prov:Entity;
138
+ prov:wasGeneratredBy ns:activity-0 .
139
+
140
+ </ns/R2RDF> a prov:Agent, prov:SoftwareAgent ;
141
+ rdfs:label "Semantic Publishing Toolkit" .
142
+
143
+ ns:activity-0 a prov:Activity ;
144
+ prov:qualifiedAssociation ns:assoc-0_#{triples_assoc_id};
145
+ prov:generated <#{triplified[:resource]}> ;
146
+ prov:used <#{original[:resource]}> .
147
+
148
+ ns:assoc-0_#{triples_assoc_id} a prov:Assocation ;
149
+ prov:entity </ns/R2RDF>;
150
+ prov:hadPlan ns:plan-0.
151
+
152
+ ns:plan-0 a prov:Plan ;
153
+ rdfs:comment "generation of <#{triplified[:resource]}> by R2RDF gem" .
154
+
155
+ EOF
156
+
157
+ if triplified[:author]
158
+ str << "<#{triplified[:author]}> a prov:Agent, prov:Person .\n"
159
+
160
+ str << "<#{triplified[:author]}> foaf:givenName \"#{triplified[:author_name]}\" .\n" if triplified[:author_name]
161
+
162
+ if triplified[:organization]
163
+ str << "<#{triplified[:author]}> prov:actedOnBehalfOf <#{triplified[:organization]}> .\n\n"
164
+ str << "<#{triplified[:organization]}> a prov:Agent, prov:Organization.\n"
165
+ if triplified[:organization_name]
166
+ str << "<#{triplified[:organization]}> foaf:name \"#{triplified[:organization_name]}\" .\n\n"
167
+ else
168
+ str << "\n"
169
+ end
170
+ else
171
+ str << "\n"
172
+ end
173
+ end
174
+ end
175
+
176
+ if chain
177
+ str << "ns:activity-1 prov:used <#{chain.first[:resource]}> .\n"
178
+ str << "<#{original[:resource]}> prov:wasDerivedFrom <#{chain.first[:resource]}> .\n\n"
179
+ chain.each_with_index{ |src,i|
180
+ if i == chain.size-1
181
+ str << activity(src[:resource],nil,src)
182
+ else
183
+ str << activity(src[:resource],chain[i+1][:resource],src)
184
+ end
185
+ }
186
+ end
187
+
188
+ str
189
+ end
190
+
191
+ def activity(entity, used, options={})
192
+ assoc_id = Time.now.nsec.to_s(32)
193
+ activity_id = Time.now.nsec.to_s(32)
194
+ plan_id = Time.now.nsec.to_s(32)
195
+
196
+ raise "NoEntityGiven: activity generation requires a subject entity" unless entity
197
+
198
+ entity_str = <<-EOF.unindent
199
+ <#{entity}> a prov:Entity ;
200
+ prov:wasGeneratredBy ns:activity-a_#{activity_id} ;
201
+ EOF
202
+
203
+ activity_str = <<-EOF.unindent
204
+ ns:activity-a_#{activity_id} a prov:Activity ;
205
+ prov:generated <#{entity}> ;
206
+ EOF
207
+
208
+ if used
209
+ entity_str << "\tprov:wasDerivedFrom <#{used}> . \n\n"
210
+ activity_str << "\tprov:used <#{used}> . \n\n"
211
+ else
212
+ entity_str[-2] = ".\n"
213
+ activity_str[-2] = ".\n"
214
+ end
215
+
216
+ activity_str << <<-EOF.unindent
217
+ ns:activity-a_#{activity_id} prov:qualifiedAssociation ns:assoc-s_#{assoc_id} .
218
+
219
+ ns:assoc-s_#{assoc_id} a prov:Assocation .
220
+
221
+ EOF
222
+
223
+ if options[:software]
224
+
225
+ activity_str << <<-EOF.unindent
226
+ <#{options[:software]}> a prov:Entity .
227
+
228
+ ns:assoc-s_#{assoc_id} prov:agent <#{options[:software]}> .
229
+ EOF
230
+
231
+ if options[:process]
232
+ options[:process] = IO.read(options[:process]) if File.exist? options[:process]
233
+
234
+ steps = '"' + options[:process].split("\n").join('" "') + '"'
235
+ activity_str << <<-EOF.unindent
236
+ ns:assoc-s_#{assoc_id} prov:hadPlan ns:plan-p_#{plan_id}.
237
+
238
+ ns:plan-p_#{plan_id} a prov:Plan ;
239
+ rdfs:comment (#{steps});
240
+ EOF
241
+ end
242
+ end
243
+
244
+ if options[:author]
245
+ entity_str << "<#{options[:author]}> a prov:Agent, prov:Person .\n"
246
+ entity_str << "<#{options[:author]}> foaf:givenName \"#{options[:author_name]}\" .\n" if options[:author_name]
247
+
248
+ activity_str << "ns:activity-a_#{activity_id} prov:wasAssociatedWith <#{options[:author]}> .\n"
249
+ activity_str << "ns:assoc-s_#{assoc_id} prov:agent <#{options[:author]}> .\n"
250
+
251
+ if options[:organization]
252
+ entity_str << "<#{options[:organization]}> a prov:Agent, prov:Organization .\n"
253
+ activity_str << "<#{options[:author]}> prov:actedOnBehalfOf <#{options[:organization]}> .\n\n"
254
+ if options[:organization_name]
255
+ entity_str << "<#{options[:organization]}> foaf:name \"#{options[:organization_name]}\" .\n\n"
256
+ end
257
+ else
258
+ activity_str << "\n"
259
+ # entity_str << "\n"
260
+ end
261
+ end
262
+
263
+ entity_str + "\n" + activity_str
264
+ end
265
+
266
+ def process(id, step_string, software_resource, software_var, options={})
267
+ #TODO a better predicate for the steplist than rdfs:comment
268
+ # and make sure it looks good.
269
+ steps = '"' + step_string.split("\n").join('" "') + '"'
270
+ assoc_id = Time.now.nsec.to_s(32)
271
+ str = <<-EOF.unindent
272
+ ns:activity-#{id} a prov:Activity ;
273
+ prov:qualifiedAssociation ns:assoc-#{assoc_id} ;
274
+ prov:used </ns/dataset/#{software_var}#var>.
275
+
276
+ ns:assoc-#{id}_#{assoc_id} a prov:Assocation ;
277
+ prov:entity <#{software_resource}>;
278
+ prov:hadPlan ns:plan-#{id}.
279
+
280
+ ns:plan-#{id} a prov:Plan ;
281
+ rdfs:comment (#{steps});
282
+
283
+ EOF
284
+
285
+ end
286
+
287
+ def r2rdf_metadata
288
+ str <<-EOF.unindent
289
+ <#{options[:base_url]}/ns/R2RDF> a foaf:Agent;
290
+ foaf:name "R2RDF Semantic Web Toolkit";
291
+ org:memberOf <http://sciruby.com/>
292
+ EOF
293
+ end
294
+
295
+ def org_metadata
296
+ str <<-EOF.unindent
297
+ <http://sciruby.com/> a org:Organization, prov:Organization;
298
+ skos:prefLabel "SciRuby";
299
+ rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby".
300
+ EOF
301
+ end
302
+
303
+ def metadata_help(topic=nil)
304
+ if topic
305
+ puts "This should display help information for #{topic}, but there's none here yet :("
306
+ else
307
+ puts <<-EOF.unindent
308
+ Available metadata fields:
309
+ (Field) (Ontology) (Description)
310
+
311
+ publishers dct/foaf/org The Organization/s responsible for publishing the dataset
312
+ subject dct The subject of this dataset. Use resources when possible
313
+ var dct The name of the datset resource (used internally)
314
+ creator dct The person or process responsible for creating the dataset
315
+ description dct/rdfs A descriptions of the dataset
316
+ issued dct The date of issuance for the dataset
317
+
318
+ EOF
319
+ end
320
+ end
321
+ end
322
+ end
323
+ end