bio-publisci 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +1 -1
  4. data/examples/prov_dsl.prov +2 -1
  5. data/examples/safe_gen.rb +7 -0
  6. data/examples/visualization/primer.prov +66 -0
  7. data/examples/visualization/prov_viz.rb +140 -0
  8. data/examples/visualization/viz.rb +35 -0
  9. data/features/metadata_steps.rb +2 -4
  10. data/features/orm_steps.rb +4 -4
  11. data/features/reader_steps.rb +1 -1
  12. data/features/store_steps.rb +1 -1
  13. data/features/writer.feature +1 -1
  14. data/features/writer_steps.rb +1 -1
  15. data/lib/bio-publisci.rb +10 -2
  16. data/lib/bio-publisci/analyzer.rb +4 -4
  17. data/lib/bio-publisci/{spira.rb → datacube_model.rb} +4 -5
  18. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +12 -12
  19. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
  20. data/lib/bio-publisci/dataset/configuration.rb +31 -0
  21. data/lib/bio-publisci/dataset/data_cube.rb +28 -17
  22. data/lib/bio-publisci/dataset/dataset.rb +11 -0
  23. data/lib/bio-publisci/dataset/dataset_for.rb +19 -9
  24. data/lib/bio-publisci/dataset/interactive.rb +1 -1
  25. data/lib/bio-publisci/dsl/config.rb +34 -0
  26. data/lib/bio-publisci/dsl/dataset_dsl.rb +91 -0
  27. data/lib/bio-publisci/dsl/dsl.rb +69 -0
  28. data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
  29. data/lib/bio-publisci/{metadata/prov/dsl.rb → dsl/prov_dsl.rb} +30 -6
  30. data/lib/bio-publisci/metadata/generator.rb +323 -0
  31. data/lib/bio-publisci/metadata/metadata.rb +3 -314
  32. data/lib/bio-publisci/metadata/prov/activity.rb +3 -1
  33. data/lib/bio-publisci/metadata/prov/agent.rb +1 -1
  34. data/lib/bio-publisci/metadata/prov/association.rb +2 -2
  35. data/lib/bio-publisci/metadata/prov/config.rb +34 -0
  36. data/lib/bio-publisci/metadata/prov/derivation.rb +7 -2
  37. data/lib/bio-publisci/metadata/prov/element.rb +2 -2
  38. data/lib/bio-publisci/metadata/prov/entity.rb +1 -22
  39. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +8 -9
  40. data/lib/bio-publisci/metadata/prov/plan.rb +1 -1
  41. data/lib/bio-publisci/metadata/prov/prov.rb +23 -21
  42. data/lib/bio-publisci/metadata/prov/role.rb +1 -1
  43. data/lib/bio-publisci/metadata/prov/usage.rb +1 -1
  44. data/lib/bio-publisci/metadata/publisher.rb +25 -0
  45. data/lib/bio-publisci/mixins/dereferencable.rb +1 -1
  46. data/lib/bio-publisci/mixins/registry.rb +27 -0
  47. data/lib/bio-publisci/output.rb +1 -1
  48. data/lib/bio-publisci/parser.rb +1 -1
  49. data/lib/bio-publisci/query/query_helper.rb +14 -14
  50. data/lib/bio-publisci/r_client.rb +5 -5
  51. data/lib/bio-publisci/readers/arff.rb +5 -5
  52. data/lib/bio-publisci/readers/csv.rb +3 -3
  53. data/lib/bio-publisci/readers/dataframe.rb +3 -3
  54. data/lib/bio-publisci/readers/r_cross.rb +4 -4
  55. data/lib/bio-publisci/readers/r_matrix.rb +3 -3
  56. data/lib/bio-publisci/store.rb +3 -3
  57. data/lib/bio-publisci/writers/arff.rb +6 -6
  58. data/lib/bio-publisci/writers/dataframe.rb +5 -5
  59. data/scripts/islet_mlratio.rb +1 -1
  60. data/scripts/scan_islet.rb +1 -1
  61. data/scripts/update_reference.rb +2 -2
  62. data/spec/ORM/data_cube_orm_spec.rb +2 -2
  63. data/spec/ORM/prov_model_spec.rb +19 -0
  64. data/spec/analyzer_spec.rb +7 -7
  65. data/spec/data_cube_spec.rb +13 -13
  66. data/spec/dataset_for_spec.rb +11 -4
  67. data/spec/dsl_spec.rb +90 -0
  68. data/spec/generators/csv_spec.rb +4 -4
  69. data/spec/generators/dataframe_spec.rb +6 -6
  70. data/spec/generators/r_cross_spec.rb +2 -2
  71. data/spec/generators/r_matrix_spec.rb +2 -2
  72. data/spec/metadata/metadata_dsl_spec.rb +68 -0
  73. data/spec/prov/activity_spec.rb +4 -4
  74. data/spec/prov/agent_spec.rb +3 -4
  75. data/spec/prov/association_spec.rb +1 -2
  76. data/spec/prov/config_spec.rb +28 -0
  77. data/spec/prov/derivation_spec.rb +30 -0
  78. data/spec/prov/entity_spec.rb +3 -4
  79. data/spec/prov/role_spec.rb +1 -2
  80. data/spec/prov/usage_spec.rb +1 -2
  81. data/spec/r_builder_spec.rb +3 -3
  82. data/spec/turtle/bacon +20 -4
  83. data/spec/turtle/reference +20 -4
  84. metadata +37 -4
@@ -1,316 +1,5 @@
1
- class String
2
- def unindent
3
- gsub /^#{self[/\A\s*/]}/, ''
4
- end
5
- end
6
-
7
- module R2RDF
8
- module Metadata
9
- include R2RDF::Parser
10
-
11
- def defaults
12
- {
13
- encode_nulls: false,
14
- base_url: "http://www.rqtl.org",
15
- }
16
- end
17
-
18
- def basic(fields)
19
- #TODO don't assume base dataset is "ns:dataset-var",
20
- #make it just "var", and try to make that clear to calling classes
21
-
22
- fields[:var] = sanitize([fields[:var]]).first
23
-
24
- unless fields[:creator]
25
- if ENV['USER']
26
- fields[:creator] = ENV['USER']
27
- elsif ENV['USERNAME']
28
- fields[:creator] = ENV['USERNAME']
29
- end
30
- end
31
-
32
- fields[:date] = Time.now.strftime("%Y-%m-%d") unless fields[:date]
33
-
34
- #TODO some of these should probably be resources, eg dct:creator, or put under DC namespace
35
- str = <<-EOF.unindent
36
- ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}";
37
- dct:title "#{fields[:title]}";
38
- dct:creator "#{fields[:creator]}";
39
- rdfs:comment "#{fields[:description]}";
40
- dct:description "#{fields[:description]}";
41
- dct:issued "#{fields[:date]}"^^xsd:date;
42
- EOF
43
-
44
- end_str = ""
45
-
46
- if fields[:subject] && fields[:subject].size > 0
47
- str << "\tdct:subject \n"
48
- fields[:subject].each{|subject| str << "\t\t" + subject + ",\n" }
49
- str[-2] = ";"
50
- end
51
-
52
- if fields[:publishers]
53
- fields[:publishers].map{|publisher|
54
- raise "No URI for publisher #{publisher}" unless publisher[:uri]
55
- raise "No label for publisher #{publisher}" unless publisher[:label]
56
- str << "\tdct:publisher <#{publisher[:uri]}> ;\n"
57
- end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n\trdfs:label \"#{publisher[:label]}\" .\n\n"
58
- }
59
- str[-2] = '.'
60
- end
61
-
62
- str + "\n" + end_str
63
- end
64
-
65
- def provenance(original, triplified, chain, options={})
66
- #TODO: should either add a prefixes method or replace some with full URIs
67
- raise "MissingOriginal: must specify a provenance source" unless original && original[:resource]
68
-
69
- #TODO include file type etc, or create a separate method for it
70
-
71
- str = <<-EOF.unindent
72
- <#{original[:resource]}> a prov:Entity ;
73
- prov:wasGeneratredBy ns:activity-1 .
74
-
75
- ns:activity-1 a prov:Activity ;
76
- prov:generated <#{original[:resource]}> .
77
-
78
- EOF
79
-
80
- if original[:software]
81
- original_assoc_id = Time.now.nsec.to_s(32)
82
-
83
-
84
- str << <<-EOF.unindent
85
- <#{original[:software]}> a prov:Entity.
86
-
87
- ns:activity-1 prov:qualifiedAssociation ns:assoc-1_#{original_assoc_id} .
88
-
89
- ns:assoc-1_#{original_assoc_id} a prov:Assocation ;
90
- prov:entity <#{original[:software]}> .
91
-
92
- EOF
93
-
94
- if original[:process]
95
- original[:process] = IO.read(original[:process]) if File.exist? original[:process]
96
-
97
- steps = '"' + original[:process].split("\n").join('" "') + '"'
98
- str << <<-EOF.unindent
99
- ns:assoc-1_#{original_assoc_id} prov:hadPlan ns:plan-1.
100
-
101
- ns:plan-1 a prov:Plan ;
102
- rdfs:comment (#{steps});
103
-
104
- EOF
105
- end
106
- end
107
-
108
- if original[:author]
109
- str << "<#{original[:author]}> a prov:Agent, prov:Person .\n"
110
- str << "ns:activity-1 prov:wasAssociatedWith <#{original[:author]}> .\n"
111
-
112
- str << "<#{original[:author]}> foaf:givenName \"#{original[:author_name]}\" .\n" if original[:author_name]
113
-
114
- if original[:organization]
115
- str << "<#{original[:author]}> prov:actedOnBehalfOf <#{original[:organization]}> .\n\n"
116
- str << "<#{original[:organization]}> a prov:Agent, prov:Organization.\n"
117
- if original[:organization_name]
118
- str << "<#{original[:organization]}> foaf:name \"#{original[:organization_name]}\" .\n\n"
119
- else
120
- str << "\n"
121
- end
122
- else
123
- str << "\n"
124
- end
125
- end
126
-
127
- if triplified
128
- triples_assoc_id = Time.now.nsec.to_s(32)
129
-
130
- str << <<-EOF.unindent
131
- <#{triplified[:resource]}> a prov:Entity;
132
- prov:wasGeneratredBy ns:activity-0 .
133
-
134
- </ns/R2RDF> a prov:Agent, prov:SoftwareAgent ;
135
- rdfs:label "Semantic Publishing Toolkit" .
136
-
137
- ns:activity-0 a prov:Activity ;
138
- prov:qualifiedAssociation ns:assoc-0_#{triples_assoc_id};
139
- prov:generated <#{triplified[:resource]}> ;
140
- prov:used <#{original[:resource]}> .
141
-
142
- ns:assoc-0_#{triples_assoc_id} a prov:Assocation ;
143
- prov:entity </ns/R2RDF>;
144
- prov:hadPlan ns:plan-0.
145
-
146
- ns:plan-0 a prov:Plan ;
147
- rdfs:comment "generation of <#{triplified[:resource]}> by R2RDF gem" .
148
-
149
- EOF
150
-
151
- if triplified[:author]
152
- str << "<#{triplified[:author]}> a prov:Agent, prov:Person .\n"
153
-
154
- str << "<#{triplified[:author]}> foaf:givenName \"#{triplified[:author_name]}\" .\n" if triplified[:author_name]
155
-
156
- if triplified[:organization]
157
- str << "<#{triplified[:author]}> prov:actedOnBehalfOf <#{triplified[:organization]}> .\n\n"
158
- str << "<#{triplified[:organization]}> a prov:Agent, prov:Organization.\n"
159
- if triplified[:organization_name]
160
- str << "<#{triplified[:organization]}> foaf:name \"#{triplified[:organization_name]}\" .\n\n"
161
- else
162
- str << "\n"
163
- end
164
- else
165
- str << "\n"
166
- end
167
- end
168
- end
169
-
170
- if chain
171
- str << "ns:activity-1 prov:used <#{chain.first[:resource]}> .\n"
172
- str << "<#{original[:resource]}> prov:wasDerivedFrom <#{chain.first[:resource]}> .\n\n"
173
- chain.each_with_index{ |src,i|
174
- if i == chain.size-1
175
- str << activity(src[:resource],nil,src)
176
- else
177
- str << activity(src[:resource],chain[i+1][:resource],src)
178
- end
179
- }
180
- end
181
-
182
- str
183
- end
184
-
185
- def activity(entity, used, options={})
186
- assoc_id = Time.now.nsec.to_s(32)
187
- activity_id = Time.now.nsec.to_s(32)
188
- plan_id = Time.now.nsec.to_s(32)
189
-
190
- raise "NoEntityGiven: activity generation requires a subject entity" unless entity
191
-
192
- entity_str = <<-EOF.unindent
193
- <#{entity}> a prov:Entity ;
194
- prov:wasGeneratredBy ns:activity-a_#{activity_id} ;
195
- EOF
196
-
197
- activity_str = <<-EOF.unindent
198
- ns:activity-a_#{activity_id} a prov:Activity ;
199
- prov:generated <#{entity}> ;
200
- EOF
201
-
202
- if used
203
- entity_str << "\tprov:wasDerivedFrom <#{used}> . \n\n"
204
- activity_str << "\tprov:used <#{used}> . \n\n"
205
- else
206
- entity_str[-2] = ".\n"
207
- activity_str[-2] = ".\n"
208
- end
209
-
210
- activity_str << <<-EOF.unindent
211
- ns:activity-a_#{activity_id} prov:qualifiedAssociation ns:assoc-s_#{assoc_id} .
212
-
213
- ns:assoc-s_#{assoc_id} a prov:Assocation .
214
-
215
- EOF
216
-
217
- if options[:software]
218
-
219
- activity_str << <<-EOF.unindent
220
- <#{options[:software]}> a prov:Entity .
221
-
222
- ns:assoc-s_#{assoc_id} prov:agent <#{options[:software]}> .
223
- EOF
224
-
225
- if options[:process]
226
- options[:process] = IO.read(options[:process]) if File.exist? options[:process]
227
-
228
- steps = '"' + options[:process].split("\n").join('" "') + '"'
229
- activity_str << <<-EOF.unindent
230
- ns:assoc-s_#{assoc_id} prov:hadPlan ns:plan-p_#{plan_id}.
231
-
232
- ns:plan-p_#{plan_id} a prov:Plan ;
233
- rdfs:comment (#{steps});
234
- EOF
235
- end
236
- end
237
-
238
- if options[:author]
239
- entity_str << "<#{options[:author]}> a prov:Agent, prov:Person .\n"
240
- entity_str << "<#{options[:author]}> foaf:givenName \"#{options[:author_name]}\" .\n" if options[:author_name]
241
-
242
- activity_str << "ns:activity-a_#{activity_id} prov:wasAssociatedWith <#{options[:author]}> .\n"
243
- activity_str << "ns:assoc-s_#{assoc_id} prov:agent <#{options[:author]}> .\n"
244
-
245
- if options[:organization]
246
- entity_str << "<#{options[:organization]}> a prov:Agent, prov:Organization .\n"
247
- activity_str << "<#{options[:author]}> prov:actedOnBehalfOf <#{options[:organization]}> .\n\n"
248
- if options[:organization_name]
249
- entity_str << "<#{options[:organization]}> foaf:name \"#{options[:organization_name]}\" .\n\n"
250
- end
251
- else
252
- activity_str << "\n"
253
- # entity_str << "\n"
254
- end
255
- end
256
-
257
- entity_str + "\n" + activity_str
258
- end
259
-
260
- def process(id, step_string, software_resource, software_var, options={})
261
- #TODO a better predicate for the steplist than rdfs:comment
262
- # and make sure it looks good.
263
- steps = '"' + step_string.split("\n").join('" "') + '"'
264
- assoc_id = Time.now.nsec.to_s(32)
265
- str = <<-EOF.unindent
266
- ns:activity-#{id} a prov:Activity ;
267
- prov:qualifiedAssociation ns:assoc-#{assoc_id} ;
268
- prov:used </ns/dataset/#{software_var}#var>.
269
-
270
- ns:assoc-#{id}_#{assoc_id} a prov:Assocation ;
271
- prov:entity <#{software_resource}>;
272
- prov:hadPlan ns:plan-#{id}.
273
-
274
- ns:plan-#{id} a prov:Plan ;
275
- rdfs:comment (#{steps});
276
-
277
- EOF
278
-
279
- end
280
-
281
- def r2rdf_metadata
282
- str <<-EOF.unindent
283
- <#{options[:base_url]}/ns/R2RDF> a foaf:Agent;
284
- foaf:name "R2RDF Semantic Web Toolkit";
285
- org:memberOf <http://sciruby.com/>
286
- EOF
287
- end
288
-
289
- def org_metadata
290
- str <<-EOF.unindent
291
- <http://sciruby.com/> a org:Organization, prov:Organization;
292
- skos:prefLabel "SciRuby";
293
- rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby".
294
- EOF
295
- end
296
-
297
- def metadata_help(topic=nil)
298
- if topic
299
- puts "This should display help information for #{topic}, but there's none here yet :("
300
- else
301
- puts <<-EOF.unindent
302
- Available metadata fields:
303
- (Field) (Ontology) (Description)
304
-
305
- publishers dct/foaf/org The Organization/s responsible for publishing the dataset
306
- subject dct The subject of this dataset. Use resources when possible
307
- var dct The name of the datset resource (used internally)
308
- creator dct The person or process responsible for creating the dataset
309
- description dct/rdfs A descriptions of the dataset
310
- issued dct The date of issuance for the dataset
311
-
312
- EOF
313
- end
314
- end
1
+ module PubliSci
2
+ class Metadata
3
+ extend PubliSci::Registry
315
4
  end
316
5
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Activity
4
4
  include Prov::Element
5
5
  class Associations < Array
@@ -26,6 +26,8 @@ module Prov
26
26
  def generated(entity=nil)
27
27
  if entity.is_a? Entity
28
28
  entity.generated_by self
29
+ elsif Prov.entities[entity]
30
+ Prov.entities[entity].generated_by self
29
31
  end
30
32
  basic_list(:generated,:entities,Generations,entity)
31
33
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Agent
4
4
  include Prov::Element
5
5
  attr_accessor :organization
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Association
4
4
  include Prov::Element
5
5
 
@@ -95,7 +95,7 @@ module Prov
95
95
  str << "\tprov:agent <#{agent}> ;\n"
96
96
  str << "\tprov:hadPlan <#{plan}> ;\n" if plan
97
97
  str << "\tprov:hadRole <#{role}> ;\n" if role
98
- str[-2] = ".\n"
98
+ str << "\trdfs:label \"#{__label}\".\n\n"
99
99
  str
100
100
  end
101
101
 
@@ -0,0 +1,34 @@
1
+ module PubliSci
2
+ class Prov
3
+ class Configuration
4
+ def self.defaults
5
+ {
6
+ output: :generate_n3,
7
+ abbreviate: false,
8
+ repository: :in_memory,
9
+ repository_url: 'http://localhost:8080/'
10
+ }
11
+ end
12
+
13
+ defaults.keys.each{|k|
14
+ default = defaults[k]
15
+ define_method(k) do |input=nil|
16
+ var = instance_variable_get :"@#{k}"
17
+ if var
18
+ var
19
+ else
20
+ instance_variable_set :"@#{k}", default
21
+ end
22
+
23
+ if input
24
+ instance_variable_set :"@#{k}", input
25
+ end
26
+
27
+ instance_variable_get :"@#{k}"
28
+ end
29
+
30
+ attr_writer k
31
+ }
32
+ end
33
+ end
34
+ end
@@ -1,9 +1,14 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Derivation
4
4
 
5
5
  include PubliSci::CustomPredicate
6
6
 
7
+ def __label
8
+ # raise "MissingInternalLabel: no __label for #{self.inspect}" unless @__label
9
+ @__label ||= Time.now.nsec.to_s(32)
10
+ end
11
+
7
12
  def subject(sub=nil)
8
13
  if sub
9
14
  @subject = sub
@@ -40,10 +45,10 @@ module PubliSci
40
45
  str = "<#{subject}> a prov:Derivation ;\n"
41
46
  str << "\tprov:entity <#{entity}> ;\n" if entity
42
47
  str << "\tprov:hadActivity <#{had_activity}> ;\n" if had_activity
48
+ str << "\trdfs:label \"#{__label}\".\n\n"
43
49
 
44
50
  add_custom(str)
45
51
 
46
- str[-2] = ".\n"
47
52
  str
48
53
  end
49
54
 
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  module Element
4
4
  include PubliSci::Vocabulary
5
5
  include PubliSci::CustomPredicate
@@ -103,7 +103,7 @@ module PubliSci
103
103
  instance_variable_set("@#{var}",collection_class.new)
104
104
  end
105
105
  instance_variable_get("@#{var}") << inst
106
- Prov.register(type,inst)
106
+ Prov.register(inst.__label,inst)
107
107
  else
108
108
  if name
109
109
  unless instance_variable_get("@#{var}")
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Prov
2
+ class Prov
3
3
  class Entity
4
4
  include Prov::Element
5
5
 
@@ -28,31 +28,10 @@ module PubliSci
28
28
 
29
29
  def attributed_to(agent=nil)
30
30
  basic_keyword(:attributed_to,:agents,agent)
31
- # if agent
32
- # @attributed_to = agent
33
- # elsif @attributed_to.is_a? Symbol
34
- # raise "UnknownAgent: #{@attributed_to}" unless Prov.agents[@attributed_to]
35
- # @attributed_to = Prov.agents[@attributed_to]
36
- # else
37
- # @attributed_to
38
- # end
39
31
  end
40
32
 
41
33
  def derived_from(entity=nil,&block)
42
34
  block_list(:derived_from,:derivations,Derivation,Derivations,entity,&block)
43
- # if block_given?
44
- # deriv = Derivation.new
45
- # deriv.instance_eval(&block)
46
- # (@derived_from ||= Derivations.new) << deriv
47
- # Prov.register(nil,deriv)
48
- # else
49
- # if entity
50
-
51
- # (@derived_from ||= Derivations.new) << entity
52
- # else
53
- # @derived_from
54
- # end
55
- # end
56
35
  end
57
36
 
58
37
  def to_n3