bio-publisci 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +1 -1
  4. data/examples/prov_dsl.prov +2 -1
  5. data/examples/safe_gen.rb +7 -0
  6. data/examples/visualization/primer.prov +66 -0
  7. data/examples/visualization/prov_viz.rb +140 -0
  8. data/examples/visualization/viz.rb +35 -0
  9. data/features/metadata_steps.rb +2 -4
  10. data/features/orm_steps.rb +4 -4
  11. data/features/reader_steps.rb +1 -1
  12. data/features/store_steps.rb +1 -1
  13. data/features/writer.feature +1 -1
  14. data/features/writer_steps.rb +1 -1
  15. data/lib/bio-publisci.rb +10 -2
  16. data/lib/bio-publisci/analyzer.rb +4 -4
  17. data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
  18. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
  19. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
  20. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  21. data/lib/bio-publisci/dataset/data_cube.rb +28 -17
  22. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  23. data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
  24. data/lib/bio-publisci/dataset/interactive.rb +1 -1
  25. data/lib/bio-publisci/dsl/config.rb +34 -0
  26. data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
  27. data/lib/bio-publisci/dsl/dsl.rb +69 -0
  28. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  29. data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
  30. data/lib/bio-publisci/metadata/generator.rb +323 -0
  31. data/lib/bio-publisci/metadata/metadata.rb +3 -314
  32. data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
  33. data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
  34. data/lib/bio-publisci/metadata/prov/association.rb +2 -2
  35. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  36. data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
  37. data/lib/bio-publisci/metadata/prov/element.rb +2 -2
  38. data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
  39. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
  40. data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
  41. data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
  42. data/lib/bio-publisci/metadata/prov/role.rb +1 -1
  43. data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
  44. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  45. data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
  46. data/lib/bio-publisci/mixins/registry.rb +27 -0
  47. data/lib/bio-publisci/output.rb +1 -1
  48. data/lib/bio-publisci/parser.rb +1 -1
  49. data/lib/bio-publisci/query/query_helper.rb +14 -14
  50. data/lib/bio-publisci/r_client.rb +5 -5
  51. data/lib/bio-publisci/readers/arff.rb +5 -5
  52. data/lib/bio-publisci/readers/csv.rb +3 -3
  53. data/lib/bio-publisci/readers/dataframe.rb +3 -3
  54. data/lib/bio-publisci/readers/r_cross.rb +4 -4
  55. data/lib/bio-publisci/readers/r_matrix.rb +3 -3
  56. data/lib/bio-publisci/store.rb +3 -3
  57. data/lib/bio-publisci/writers/arff.rb +6 -6
  58. data/lib/bio-publisci/writers/dataframe.rb +5 -5
  59. data/scripts/islet_mlratio.rb +1 -1
  60. data/scripts/scan_islet.rb +1 -1
  61. data/scripts/update_reference.rb +2 -2
  62. data/spec/ORM/data_cube_orm_spec.rb +2 -2
  63. data/spec/ORM/prov_model_spec.rb +19 -0
  64. data/spec/analyzer_spec.rb +7 -7
  65. data/spec/data_cube_spec.rb +13 -13
  66. data/spec/dataset_for_spec.rb +11 -4
  67. data/spec/dsl_spec.rb +90 -0
  68. data/spec/generators/csv_spec.rb +4 -4
  69. data/spec/generators/dataframe_spec.rb +6 -6
  70. data/spec/generators/r_cross_spec.rb +2 -2
  71. data/spec/generators/r_matrix_spec.rb +2 -2
  72. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  73. data/spec/prov/activity_spec.rb +4 -4
  74. data/spec/prov/agent_spec.rb +3 -4
  75. data/spec/prov/association_spec.rb +1 -2
  76. data/spec/prov/config_spec.rb +28 -0
  77. data/spec/prov/derivation_spec.rb +30 -0
  78. data/spec/prov/entity_spec.rb +3 -4
  79. data/spec/prov/role_spec.rb +1 -2
  80. data/spec/prov/usage_spec.rb +1 -2
  81. data/spec/r_builder_spec.rb +3 -3
  82. data/spec/turtle/bacon +20 -4
  83. data/spec/turtle/reference +20 -4
  84. metadata +37 -4
@@ -1,316 +1,5 @@
1
- class String
2
- def unindent
3
- gsub /^#{self[/\A\s*/]}/, ''
4
- end
5
- end
6
-
7
- module R2RDF
8
- module Metadata
9
- include R2RDF::Parser
10
-
11
- def defaults
12
- {
13
- encode_nulls: false,
14
- base_url: "http://www.rqtl.org",
15
- }
16
- end
17
-
18
- def basic(fields)
19
- #TODO don't assume base dataset is "ns:dataset-var",
20
- #make it just "var", and try to make that clear to calling classes
21
-
22
- fields[:var] = sanitize([fields[:var]]).first
23
-
24
- unless fields[:creator]
25
- if ENV['USER']
26
- fields[:creator] = ENV['USER']
27
- elsif ENV['USERNAME']
28
- fields[:creator] = ENV['USERNAME']
29
- end
30
- end
31
-
32
- fields[:date] = Time.now.strftime("%Y-%m-%d") unless fields[:date]
33
-
34
- #TODO some of these should probably be resources, eg dct:creator, or put under DC namespace
35
- str = <<-EOF.unindent
36
- ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}";
37
- dct:title "#{fields[:title]}";
38
- dct:creator "#{fields[:creator]}";
39
- rdfs:comment "#{fields[:description]}";
40
- dct:description "#{fields[:description]}";
41
- dct:issued "#{fields[:date]}"^^xsd:date;
42
- EOF
43
-
44
- end_str = ""
45
-
46
- if fields[:subject] && fields[:subject].size > 0
47
- str << "\tdct:subject \n"
48
- fields[:subject].each{|subject| str << "\t\t" + subject + ",\n" }
49
- str[-2] = ";"
50
- end
51
-
52
- if fields[:publishers]
53
- fields[:publishers].map{|publisher|
54
- raise "No URI for publisher #{publisher}" unless publisher[:uri]
55
- raise "No label for publisher #{publisher}" unless publisher[:label]
56
- str << "\tdct:publisher <#{publisher[:uri]}> ;\n"
57
- end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n\trdfs:label \"#{publisher[:label]}\" .\n\n"
58
- }
59
- str[-2] = '.'
60
- end
61
-
62
- str + "\n" + end_str
63
- end
64
-
65
- def provenance(original, triplified, chain, options={})
66
- #TODO: should either add a prefixes method or replace some with full URIs
67
- raise "MissingOriginal: must specify a provenance source" unless original && original[:resource]
68
-
69
- #TODO include file type etc, or create a separate method for it
70
-
71
- str = <<-EOF.unindent
72
- <#{original[:resource]}> a prov:Entity ;
73
- prov:wasGeneratredBy ns:activity-1 .
74
-
75
- ns:activity-1 a prov:Activity ;
76
- prov:generated <#{original[:resource]}> .
77
-
78
- EOF
79
-
80
- if original[:software]
81
- original_assoc_id = Time.now.nsec.to_s(32)
82
-
83
-
84
- str << <<-EOF.unindent
85
- <#{original[:software]}> a prov:Entity.
86
-
87
- ns:activity-1 prov:qualifiedAssociation ns:assoc-1_#{original_assoc_id} .
88
-
89
- ns:assoc-1_#{original_assoc_id} a prov:Assocation ;
90
- prov:entity <#{original[:software]}> .
91
-
92
- EOF
93
-
94
- if original[:process]
95
- original[:process] = IO.read(original[:process]) if File.exist? original[:process]
96
-
97
- steps = '"' + original[:process].split("\n").join('" "') + '"'
98
- str << <<-EOF.unindent
99
- ns:assoc-1_#{original_assoc_id} prov:hadPlan ns:plan-1.
100
-
101
- ns:plan-1 a prov:Plan ;
102
- rdfs:comment (#{steps});
103
-
104
- EOF
105
- end
106
- end
107
-
108
- if original[:author]
109
- str << "<#{original[:author]}> a prov:Agent, prov:Person .\n"
110
- str << "ns:activity-1 prov:wasAssociatedWith <#{original[:author]}> .\n"
111
-
112
- str << "<#{original[:author]}> foaf:givenName \"#{original[:author_name]}\" .\n" if original[:author_name]
113
-
114
- if original[:organization]
115
- str << "<#{original[:author]}> prov:actedOnBehalfOf <#{original[:organization]}> .\n\n"
116
- str << "<#{original[:organization]}> a prov:Agent, prov:Organization.\n"
117
- if original[:organization_name]
118
- str << "<#{original[:organization]}> foaf:name \"#{original[:organization_name]}\" .\n\n"
119
- else
120
- str << "\n"
121
- end
122
- else
123
- str << "\n"
124
- end
125
- end
126
-
127
- if triplified
128
- triples_assoc_id = Time.now.nsec.to_s(32)
129
-
130
- str << <<-EOF.unindent
131
- <#{triplified[:resource]}> a prov:Entity;
132
- prov:wasGeneratredBy ns:activity-0 .
133
-
134
- </ns/R2RDF> a prov:Agent, prov:SoftwareAgent ;
135
- rdfs:label "Semantic Publishing Toolkit" .
136
-
137
- ns:activity-0 a prov:Activity ;
138
- prov:qualifiedAssociation ns:assoc-0_#{triples_assoc_id};
139
- prov:generated <#{triplified[:resource]}> ;
140
- prov:used <#{original[:resource]}> .
141
-
142
- ns:assoc-0_#{triples_assoc_id} a prov:Assocation ;
143
- prov:entity </ns/R2RDF>;
144
- prov:hadPlan ns:plan-0.
145
-
146
- ns:plan-0 a prov:Plan ;
147
- rdfs:comment "generation of <#{triplified[:resource]}> by R2RDF gem" .
148
-
149
- EOF
150
-
151
- if triplified[:author]
152
- str << "<#{triplified[:author]}> a prov:Agent, prov:Person .\n"
153
-
154
- str << "<#{triplified[:author]}> foaf:givenName \"#{triplified[:author_name]}\" .\n" if triplified[:author_name]
155
-
156
- if triplified[:organization]
157
- str << "<#{triplified[:author]}> prov:actedOnBehalfOf <#{triplified[:organization]}> .\n\n"
158
- str << "<#{triplified[:organization]}> a prov:Agent, prov:Organization.\n"
159
- if triplified[:organization_name]
160
- str << "<#{triplified[:organization]}> foaf:name \"#{triplified[:organization_name]}\" .\n\n"
161
- else
162
- str << "\n"
163
- end
164
- else
165
- str << "\n"
166
- end
167
- end
168
- end
169
-
170
- if chain
171
- str << "ns:activity-1 prov:used <#{chain.first[:resource]}> .\n"
172
- str << "<#{original[:resource]}> prov:wasDerivedFrom <#{chain.first[:resource]}> .\n\n"
173
- chain.each_with_index{ |src,i|
174
- if i == chain.size-1
175
- str << activity(src[:resource],nil,src)
176
- else
177
- str << activity(src[:resource],chain[i+1][:resource],src)
178
- end
179
- }
180
- end
181
-
182
- str
183
- end
184
-
185
- def activity(entity, used, options={})
186
- assoc_id = Time.now.nsec.to_s(32)
187
- activity_id = Time.now.nsec.to_s(32)
188
- plan_id = Time.now.nsec.to_s(32)
189
-
190
- raise "NoEntityGiven: activity generation requires a subject entity" unless entity
191
-
192
- entity_str = <<-EOF.unindent
193
- <#{entity}> a prov:Entity ;
194
- prov:wasGeneratredBy ns:activity-a_#{activity_id} ;
195
- EOF
196
-
197
- activity_str = <<-EOF.unindent
198
- ns:activity-a_#{activity_id} a prov:Activity ;
199
- prov:generated <#{entity}> ;
200
- EOF
201
-
202
- if used
203
- entity_str << "\tprov:wasDerivedFrom <#{used}> . \n\n"
204
- activity_str << "\tprov:used <#{used}> . \n\n"
205
- else
206
- entity_str[-2] = ".\n"
207
- activity_str[-2] = ".\n"
208
- end
209
-
210
- activity_str << <<-EOF.unindent
211
- ns:activity-a_#{activity_id} prov:qualifiedAssociation ns:assoc-s_#{assoc_id} .
212
-
213
- ns:assoc-s_#{assoc_id} a prov:Assocation .
214
-
215
- EOF
216
-
217
- if options[:software]
218
-
219
- activity_str << <<-EOF.unindent
220
- <#{options[:software]}> a prov:Entity .
221
-
222
- ns:assoc-s_#{assoc_id} prov:agent <#{options[:software]}> .
223
- EOF
224
-
225
- if options[:process]
226
- options[:process] = IO.read(options[:process]) if File.exist? options[:process]
227
-
228
- steps = '"' + options[:process].split("\n").join('" "') + '"'
229
- activity_str << <<-EOF.unindent
230
- ns:assoc-s_#{assoc_id} prov:hadPlan ns:plan-p_#{plan_id}.
231
-
232
- ns:plan-p_#{plan_id} a prov:Plan ;
233
- rdfs:comment (#{steps});
234
- EOF
235
- end
236
- end
237
-
238
- if options[:author]
239
- entity_str << "<#{options[:author]}> a prov:Agent, prov:Person .\n"
240
- entity_str << "<#{options[:author]}> foaf:givenName \"#{options[:author_name]}\" .\n" if options[:author_name]
241
-
242
- activity_str << "ns:activity-a_#{activity_id} prov:wasAssociatedWith <#{options[:author]}> .\n"
243
- activity_str << "ns:assoc-s_#{assoc_id} prov:agent <#{options[:author]}> .\n"
244
-
245
- if options[:organization]
246
- entity_str << "<#{options[:organization]}> a prov:Agent, prov:Organization .\n"
247
- activity_str << "<#{options[:author]}> prov:actedOnBehalfOf <#{options[:organization]}> .\n\n"
248
- if options[:organization_name]
249
- entity_str << "<#{options[:organization]}> foaf:name \"#{options[:organization_name]}\" .\n\n"
250
- end
251
- else
252
- activity_str << "\n"
253
- # entity_str << "\n"
254
- end
255
- end
256
-
257
- entity_str + "\n" + activity_str
258
- end
259
-
260
- def process(id, step_string, software_resource, software_var, options={})
261
- #TODO a better predicate for the steplist than rdfs:comment
262
- # and make sure it looks good.
263
- steps = '"' + step_string.split("\n").join('" "') + '"'
264
- assoc_id = Time.now.nsec.to_s(32)
265
- str = <<-EOF.unindent
266
- ns:activity-#{id} a prov:Activity ;
267
- prov:qualifiedAssociation ns:assoc-#{assoc_id} ;
268
- prov:used </ns/dataset/#{software_var}#var>.
269
-
270
- ns:assoc-#{id}_#{assoc_id} a prov:Assocation ;
271
- prov:entity <#{software_resource}>;
272
- prov:hadPlan ns:plan-#{id}.
273
-
274
- ns:plan-#{id} a prov:Plan ;
275
- rdfs:comment (#{steps});
276
-
277
- EOF
278
-
279
- end
280
-
281
- def r2rdf_metadata
282
- str <<-EOF.unindent
283
- <#{options[:base_url]}/ns/R2RDF> a foaf:Agent;
284
- foaf:name "R2RDF Semantic Web Toolkit";
285
- org:memberOf <http://sciruby.com/>
286
- EOF
287
- end
288
-
289
- def org_metadata
290
- str <<-EOF.unindent
291
- <http://sciruby.com/> a org:Organization, prov:Organization;
292
- skos:prefLabel "SciRuby";
293
- rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby".
294
- EOF
295
- end
296
-
297
- def metadata_help(topic=nil)
298
- if topic
299
- puts "This should display help information for #{topic}, but there's none here yet :("
300
- else
301
- puts <<-EOF.unindent
302
- Available metadata fields:
303
- (Field) (Ontology) (Description)
304
-
305
- publishers dct/foaf/org The Organization/s responsible for publishing the dataset
306
- subject dct The subject of this dataset. Use resources when possible
307
- var dct The name of the datset resource (used internally)
308
- creator dct The person or process responsible for creating the dataset
309
- description dct/rdfs A descriptions of the dataset
310
- issued dct The date of issuance for the dataset
311
-
312
- EOF
313
- end
314
- end
1
+ module PubliSci
2
+ class Metadata
3
+ extend PubliSci::Registry
315
4
  end
316
5
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Activity
4
4
  include Prov::Element
5
5
  class Associations < Array
@@ -26,6 +26,8 @@ module Prov
26
26
  def generated(entity=nil)
27
27
  if entity.is_a? Entity
28
28
  entity.generated_by self
29
+ elsif Prov.entities[entity]
30
+ Prov.entities[entity].generated_by self
29
31
  end
30
32
  basic_list(:generated,:entities,Generations,entity)
31
33
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Agent
4
4
  include Prov::Element
5
5
  attr_accessor :organization
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Association
4
4
  include Prov::Element
5
5
 
@@ -95,7 +95,7 @@ module Prov
95
95
  str << "\tprov:agent <#{agent}> ;\n"
96
96
  str << "\tprov:hadPlan <#{plan}> ;\n" if plan
97
97
  str << "\tprov:hadRole <#{role}> ;\n" if role
98
- str[-2] = ".\n"
98
+ str << "\trdfs:label \"#{__label}\".\n\n"
99
99
  str
100
100
  end
101
101
 
@@ -0,0 +1,34 @@
1
+ module PubliSci
2
+ class Prov
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ output: :generate_n3,
7
+ abbreviate: false,
8
+ repository: :in_memory,
9
+ repository_url: 'http://localhost:8080/'
10
+ }
11
+ end
12
+
13
+ defaults.keys.each{|k|
14
+ default = defaults[k]
15
+ define_method(k) do |input=nil|
16
+ var = instance_variable_get :"@#{k}"
17
+ if var
18
+ var
19
+ else
20
+ instance_variable_set :"@#{k}", default
21
+ end
22
+
23
+ if input
24
+ instance_variable_set :"@#{k}", input
25
+ end
26
+
27
+ instance_variable_get :"@#{k}"
28
+ end
29
+
30
+ attr_writer k
31
+ }
32
+ end
33
+ end
34
+ end
@@ -1,9 +1,14 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Derivation
4
4
 
5
5
  include PubliSci::CustomPredicate
6
6
 
7
+ def __label
8
+ # raise "MissingInternalLabel: no __label for #{self.inspect}" unless @__label
9
+ @__label ||= Time.now.nsec.to_s(32)
10
+ end
11
+
7
12
  def subject(sub=nil)
8
13
  if sub
9
14
  @subject = sub
@@ -40,10 +45,10 @@ module PubliSci
40
45
  str = "<#{subject}> a prov:Derivation ;\n"
41
46
  str << "\tprov:entity <#{entity}> ;\n" if entity
42
47
  str << "\tprov:hadActivity <#{had_activity}> ;\n" if had_activity
48
+ str << "\trdfs:label \"#{__label}\".\n\n"
43
49
 
44
50
  add_custom(str)
45
51
 
46
- str[-2] = ".\n"
47
52
  str
48
53
  end
49
54
 
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  module Element
4
4
  include PubliSci::Vocabulary
5
5
  include PubliSci::CustomPredicate
@@ -103,7 +103,7 @@ module PubliSci
103
103
  instance_variable_set("@#{var}",collection_class.new)
104
104
  end
105
105
  instance_variable_get("@#{var}") << inst
106
- Prov.register(type,inst)
106
+ Prov.register(inst.__label,inst)
107
107
  else
108
108
  if name
109
109
  unless instance_variable_get("@#{var}")
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Entity
4
4
  include Prov::Element
5
5
 
@@ -28,31 +28,10 @@ module PubliSci
28
28
 
29
29
  def attributed_to(agent=nil)
30
30
  basic_keyword(:attributed_to,:agents,agent)
31
- # if agent
32
- # @attributed_to = agent
33
- # elsif @attributed_to.is_a? Symbol
34
- # raise "UnknownAgent: #{@attributed_to}" unless Prov.agents[@attributed_to]
35
- # @attributed_to = Prov.agents[@attributed_to]
36
- # else
37
- # @attributed_to
38
- # end
39
31
  end
40
32
 
41
33
  def derived_from(entity=nil,&block)
42
34
  block_list(:derived_from,:derivations,Derivation,Derivations,entity,&block)
43
- # if block_given?
44
- # deriv = Derivation.new
45
- # deriv.instance_eval(&block)
46
- # (@derived_from ||= Derivations.new) << deriv
47
- # Prov.register(nil,deriv)
48
- # else
49
- # if entity
50
-
51
- # (@derived_from ||= Derivations.new) << entity
52
- # else
53
- # @derived_from
54
- # end
55
- # end
56
35
  end
57
36
 
58
37
  def to_n3