bio-publisci 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +5 -5
  4. data/bin/bio-publisci +34 -11
  5. data/examples/bio-band_integration.rb +9 -0
  6. data/examples/no_magic.prov +40 -0
  7. data/examples/primer.prov +28 -0
  8. data/examples/prov_dsl.prov +51 -0
  9. data/features/create_generator.feature +5 -9
  10. data/features/integration_steps.rb +8 -8
  11. data/features/metadata.feature +15 -2
  12. data/features/metadata_steps.rb +21 -0
  13. data/features/orm_steps.rb +5 -5
  14. data/features/prov_dsl.feature +14 -0
  15. data/features/prov_dsl_steps.rb +11 -0
  16. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
  17. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
  18. data/lib/bio-publisci/dataset/data_cube.rb +30 -26
  19. data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
  20. data/lib/bio-publisci/metadata/metadata.rb +180 -42
  21. data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
  22. data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
  23. data/lib/bio-publisci/metadata/prov/association.rb +73 -0
  24. data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
  25. data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
  26. data/lib/bio-publisci/metadata/prov/element.rb +52 -0
  27. data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
  28. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  29. data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
  30. data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
  31. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  32. data/lib/bio-publisci/output.rb +27 -0
  33. data/lib/bio-publisci/parser.rb +17 -8
  34. data/lib/bio-publisci/readers/csv.rb +9 -7
  35. data/lib/bio-publisci/readers/dataframe.rb +9 -8
  36. data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
  37. data/lib/bio-publisci/readers/r_matrix.rb +37 -13
  38. data/lib/bio-publisci/spira.rb +82 -0
  39. data/lib/bio-publisci/writers/dataframe.rb +65 -65
  40. data/lib/bio-publisci.rb +9 -4
  41. data/spec/ORM/data_cube_orm_spec.rb +3 -3
  42. data/spec/dataset_for_spec.rb +29 -0
  43. data/spec/generators/r_cross_spec.rb +51 -0
  44. data/spec/generators/r_matrix_spec.rb +14 -5
  45. metadata +42 -8
  46. data/lib/bio-publisci/readers/cross.rb +0 -72
@@ -0,0 +1,159 @@
1
+ module PubliSci
2
+ module Prov
3
+ module DSL
4
+
5
+ include PubliSci::Vocabulary
6
+
7
+ class Singleton
8
+ include Prov::DSL
9
+
10
+ def initialize
11
+ Prov.registry.clear
12
+ end
13
+ end
14
+
15
+ def self.included(mod)
16
+ Prov.registry.clear
17
+ end
18
+
19
+ def agent(name,args={}, &block)
20
+ if block_given?
21
+ a = Prov::Agent.new
22
+ a.instance_eval(&block)
23
+ a.__label=name
24
+ Prov.register(name, a)
25
+ else
26
+ # name = args.shift
27
+ # args = Hash[*args]
28
+ a = Prov::Agent.new
29
+
30
+ a.__label=name
31
+
32
+ a.subject args[:subject]
33
+
34
+ (args.keys - [:subject]).map{|k|
35
+ raise "Unkown agent setting #{k}" unless try_auto_set(a,k,args[k])
36
+ }
37
+
38
+
39
+ Prov.register(name, a)
40
+ end
41
+ end
42
+
43
+ def organization(name,args={},&block)
44
+ args[:type] = :organization
45
+ agent(name,args,&block)
46
+ end
47
+
48
+ def entity(name, args={}, &block)
49
+ if block_given?
50
+ e = Prov::Entity.new
51
+ e.instance_eval(&block)
52
+ e.__label=name
53
+ Prov.register(name, e)
54
+ else
55
+ # name = args.shift
56
+ # args = Hash[*args]
57
+ e = Prov::Entity.new
58
+
59
+ e.__label=name
60
+ e.subject args[:subject]
61
+ (args.keys - [:subject]).map{|k|
62
+ raise "Unkown entity setting #{k}" unless try_auto_set(e,k,args[k])
63
+ }
64
+
65
+ Prov.register(name, e)
66
+ end
67
+ end
68
+ alias_method :data, :entity
69
+
70
+ def plan(name, args={}, &block)
71
+ if block_given?
72
+ p = Prov::Plan.new
73
+ p.instance_eval(&block)
74
+ p.__label=name
75
+ Prov.register(name, e)
76
+ else
77
+ p = Prov::Plan.new
78
+
79
+ p.__label=name
80
+ p.subject args[:subject]
81
+ (args.keys - [:subject]).map{|k|
82
+ raise "Unkown plan setting #{k}" unless try_auto_set(p,k,args[k])
83
+ }
84
+
85
+
86
+ Prov.register(name, p)
87
+ end
88
+ end
89
+
90
+ def activity(name,args={}, &block)
91
+ if block_given?
92
+ act = Prov::Activity.new
93
+ act.instance_eval(&block)
94
+ act.__label=name
95
+ Prov.register(name, act)
96
+ else
97
+
98
+ act.subject args[:subject]
99
+
100
+ (args.keys - [:subject]).map{|k|
101
+ raise "Unkown agent setting #{k}" unless try_auto_set(act,k,args[k])
102
+ }
103
+
104
+ a = Prov::Activity.new
105
+
106
+ act.__label=name
107
+ Prov.register(name, act)
108
+ raise "has based activity creation not yet implemented"
109
+ end
110
+ end
111
+
112
+ def generate_n3(abbreviate = false)
113
+ entities = Prov.entities.values.map(&:to_n3).join
114
+ agents = Prov.agents.values.map(&:to_n3).join
115
+ activities = Prov.activities.values.map(&:to_n3).join
116
+ plans = Prov.plans.values.map(&:to_n3).join
117
+ associations = Prov.associations.map(&:to_n3).join
118
+ derivations = Prov.registry[:derivation].map(&:to_n3).join if Prov.registry[:derivation]
119
+
120
+ str = "#{entities}#{agents}#{activities}#{plans}#{associations}#{derivations}"
121
+
122
+ if abbreviate
123
+ abbreviate_known(str)
124
+ else
125
+ str
126
+ end
127
+ end
128
+
129
+ def return_objects
130
+ Prov.registry
131
+ end
132
+
133
+ # def vocabulary(url)
134
+ # raise "InvalidVocabulary: #{url} is not a valid URI" unless RDF::Resource(url).valid?
135
+ # RDF::Vocabulary.new(url)
136
+ # end
137
+
138
+ private
139
+ def try_auto_set(object,method,args)
140
+ if object.methods.include? method
141
+ object.send(method,args)
142
+ true
143
+ else
144
+ false
145
+ end
146
+ end
147
+
148
+ def abbreviate_known(turtle)
149
+ ttl = turtle.dup
150
+ %w{activity assoc agent plan entity derivation}.each{|element|
151
+ ttl.gsub!(%r{<#{Prov.base_url}/#{element}/([\w|\d]+)>}, "#{element}:" + '\1')
152
+ }
153
+
154
+ ttl.gsub!(%r{<http://gsocsemantic.wordpress.com/([\w|\d]+)>}, 'me:\1')
155
+ ttl
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,52 @@
1
+ module PubliSci
2
+ module Prov
3
+ module Element
4
+ include PubliSci::Vocabulary
5
+ include PubliSci::CustomPredicate
6
+
7
+ def subject(s=nil)
8
+ if s
9
+ if s.is_a? Symbol
10
+ raise "subject generation from symbol not yet implemented!"
11
+ else
12
+ @subject = s
13
+ end
14
+ else
15
+ @subject ||= generate_subject
16
+ end
17
+ end
18
+
19
+ def subject=(s)
20
+ @subject = s
21
+ end
22
+
23
+ def __label=(l)
24
+ @__label = l
25
+ end
26
+
27
+ def __label
28
+ raise "MissingInternalLabel: no __label for #{self.inspect}" unless @__label
29
+ @__label
30
+ end
31
+
32
+ private
33
+ def generate_subject
34
+ # puts self.class == Prov::Activity
35
+ category = case self
36
+ when Agent
37
+ "agent"
38
+ when Entity
39
+ "entity"
40
+ when Activity
41
+ "activity"
42
+ when Plan
43
+ "plan"
44
+ else
45
+ raise "MissingSubject: No automatic subject generation for #{self}"
46
+ end
47
+
48
+ "#{Prov.base_url}/#{category}/#{__label}"
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,101 @@
1
+ module PubliSci
2
+ module Prov
3
+ class Entity
4
+ class Derivations < Array
5
+ def [](index)
6
+ if self.fetch(index).is_a? Symbol
7
+ Prov.entities[self.fetch(index)]
8
+ else
9
+ self.fetch(index)
10
+ end
11
+ end
12
+ end
13
+
14
+ include Prov::Element
15
+
16
+ def source(s=nil)
17
+ if s
18
+ (@sources ||= []) << s
19
+ else
20
+ @sources
21
+ end
22
+ end
23
+
24
+ def generated_by(activity=nil)
25
+ if activity
26
+ @generated_by = activity
27
+ elsif @generated_by.is_a? Symbol
28
+ @generated_by = Prov.activities[@generated_by]
29
+ else
30
+ @generated_by
31
+ end
32
+ end
33
+
34
+ def attributed_to(agent=nil)
35
+ if agent
36
+ @attributed_to = agent
37
+ elsif @attributed_to.is_a? Symbol
38
+ @attributed_to = Prov.agents[@attributed_to]
39
+ else
40
+ @attributed_to
41
+ end
42
+ end
43
+
44
+ def derived_from(entity=nil,&block)
45
+ if block_given?
46
+ deriv = Derivation.new
47
+ deriv.instance_eval(&block)
48
+ (@derived_from ||= Derivations.new) << deriv
49
+ Prov.register(nil,deriv)
50
+ else
51
+ if entity
52
+ (@derived_from ||= Derivations.new) << entity
53
+ else
54
+ @derived_from
55
+ end
56
+ end
57
+ end
58
+
59
+ # def derived_from[](entity)
60
+ # if @derived_from && @derived_from[entity]
61
+ # if entity.is_a? Symbol
62
+ # Prov.entities[entity]
63
+ # else
64
+ # entity
65
+ # end
66
+ # end
67
+ # end
68
+
69
+ def to_n3
70
+ str = "<#{subject}> a prov:Entity ;\n"
71
+ str << "\tprov:wasGeneratedBy <#{generated_by}> ;\n" if generated_by
72
+ str << "\tprov:wasAttributedTo <#{attributed_to}> ;\n" if attributed_to
73
+ if derived_from
74
+ derived_from.map{|der|
75
+ der = Prov.entities[der] if der.is_a?(Symbol) && Prov.entities[der]
76
+
77
+ if der.is_a? Derivation
78
+ str << "\tprov:wasDerivedFrom <#{der.entity}> ;\n"
79
+ str << "\tprov:qualifiedDerivation <#{der.subject}> ;\n"
80
+ else
81
+ str << "\tprov:wasDerivedFrom <#{der}> ;\n"
82
+ end
83
+ }
84
+ end
85
+
86
+ # if custom
87
+ # @custom.map{|k,v|
88
+ # str << "\t<#{k.to_s}> <#{v.to_s}> ;\n"
89
+ # }
90
+ # end
91
+ add_custom(str)
92
+
93
+ str << %Q(\trdfs:label "#{__label}" .\n\n)
94
+ end
95
+
96
+ def to_s
97
+ subject
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,32 @@
1
+ module PubliSci
2
+ module Prov
3
+ class Plan
4
+ include Prov::Element
5
+
6
+ def steps(steps=nil)
7
+ if steps
8
+ if File.exist? steps
9
+ steps = Array[IO.read(steps).split("\n")]
10
+ end
11
+ @steps = Array[steps]
12
+ else
13
+ @steps
14
+ end
15
+ end
16
+
17
+ def to_n3
18
+ str = "<#{subject}> a prov:Plan, prov:Entity ;\n"
19
+ if steps
20
+ str << "\trdfs:comment (\"#{steps.join('" "')}\") ;\n"
21
+ end
22
+ add_custom(str)
23
+
24
+ str << "\trdfs:label \"#{__label}\" .\n\n"
25
+ end
26
+
27
+ def to_s
28
+ subject
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,76 @@
1
+ module PubliSci
2
+ module Prov
3
+ def self.register(name,object)
4
+ # puts "register #{name} #{object} #{associations.size}"
5
+ name = name.to_sym if name
6
+ if object.is_a? Agent
7
+ sub = :agents
8
+ elsif object.is_a? Entity
9
+ sub = :entities
10
+ elsif object.is_a? Activity
11
+ sub = :activities
12
+ elsif object.is_a? Association
13
+ sub = :associations
14
+ elsif object.is_a? Plan
15
+ sub = :plans
16
+ else
17
+ sub = object.class.to_s.split('::').last.downcase.to_sym
18
+ # raise "UnknownElement: unkown object type for #{object}"
19
+ end
20
+ if name
21
+ (registry[sub] ||= {})[name] = object
22
+ else
23
+ (registry[sub] ||= []) << object
24
+ end
25
+ end
26
+
27
+ def self.registry
28
+ @registry ||= {}
29
+ end
30
+
31
+ def self.run(string)
32
+ if File.exists? string
33
+ DSL::Singleton.new.instance_eval(IO.read(string),string)
34
+ else
35
+ DSL::Singleton.new.instance_eval(string)
36
+ end
37
+ end
38
+
39
+ def self.agents
40
+ registry[:agents] ||= {}
41
+ end
42
+
43
+ def self.entities
44
+ registry[:entities] ||= {}
45
+ end
46
+
47
+ def self.activities
48
+ registry[:activities] ||= {}
49
+ end
50
+
51
+ def self.associations
52
+ registry[:associations] ||= []
53
+ end
54
+
55
+ def self.plans
56
+ registry[:plans] ||= {}
57
+ end
58
+
59
+ def self.base_url
60
+ @base_url ||= "http://rqtl.org/ns"
61
+ end
62
+
63
+ def self.base_url=(url)
64
+ @base_url = url
65
+ end
66
+
67
+ def self.prefixes
68
+ <<-EOF
69
+ @prefix prov: <http://www.w3.org/ns/prov#> .
70
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
71
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
72
+ @prefix foaf: <http://xmlns.com/foaf/0.1/> .
73
+ EOF
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,26 @@
1
+ module PubliSci
2
+ module CustomPredicate
3
+ def has(predicate, object)
4
+ predicate = RDF::Resource(predicate) if RDF::Resource(predicate).valid?
5
+ obj = RDF::Resource(object)
6
+ obj = RDF::Literal(object) unless obj.valid?
7
+ ((@custom ||= {})[predicate] ||= []) << obj
8
+ end
9
+ alias_method :set, :has
10
+
11
+ def custom
12
+ @custom
13
+ end
14
+
15
+ def add_custom(str)
16
+ if custom
17
+ custom.map{|k,v|
18
+ pk = k.respond_to?(:to_base) ? k.to_base : k
19
+ v.map{|vv|
20
+ str << "\t#{pk} #{vv.to_base} ;\n"
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,8 @@
1
+ module PubliSci
2
+ module Vocabulary
3
+ def vocabulary(url)
4
+ raise "InvalidVocabulary: #{url} is not a valid URI" unless RDF::Resource(url).valid?
5
+ RDF::Vocabulary.new(url)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,27 @@
1
+ module R2RDF
2
+ module Reader
3
+ module Output
4
+ def output(string, options={},append=false)
5
+ options[:type] = [:string] unless options[:type]
6
+ base = options[:file_base]
7
+ name = options[:file_name]
8
+ types = Array(options[:type])
9
+
10
+ if types.include? :print
11
+ puts string
12
+ end
13
+
14
+ if types.include? :file
15
+ raise "no file specified output" unless name
16
+
17
+ method = append ? 'a' : 'w'
18
+ open("#{base}#{name}", method) { |f| f.write str }
19
+ end
20
+
21
+ if types.include? :string
22
+ string
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,12 +1,21 @@
1
1
  module R2RDF
2
2
  module Parser
3
3
 
4
+ def is_uri?(string)
5
+ RDF::Resource(string).valid?
6
+ end
7
+
4
8
  def sanitize(array)
5
9
  #remove spaces and other special characters
10
+ array = Array(array)
6
11
  processed = []
7
12
  array.map{|entry|
8
13
  if entry.is_a? String
9
- processed << entry.gsub(/[\s\.]/,'_')
14
+ if is_uri? entry
15
+ processed << entry.gsub(/[\s]/,'_')
16
+ else
17
+ processed << entry.gsub(/[\s\.]/,'_')
18
+ end
10
19
  else
11
20
  processed << entry
12
21
  end
@@ -16,7 +25,7 @@ module R2RDF
16
25
 
17
26
  def sanitize_hash(h)
18
27
  mappings = {}
19
- h.keys.map{|k|
28
+ h.keys.map{|k|
20
29
  if(k.is_a? String)
21
30
  mappings[k] = k.gsub(' ','_')
22
31
  end
@@ -74,9 +83,9 @@ module R2RDF
74
83
 
75
84
  if shorten_uris
76
85
  newh= {}
77
- h.map{|k,v|
86
+ h.map{|k,v|
78
87
  newh[strip_uri(k)] ||= {}
79
- v.map{|kk,vv|
88
+ v.map{|kk,vv|
80
89
  newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
81
90
  }
82
91
  }
@@ -88,12 +97,12 @@ module R2RDF
88
97
 
89
98
  def to_resource(obj, options)
90
99
  if obj.is_a? String
91
- obj = "<#{obj}>" if obj =~ /^http:\/\//
92
-
100
+ obj = "<#{obj}>" if is_uri? obj
101
+
93
102
  #TODO decide the right way to handle missing values, since RDF has no null
94
103
  #probably throw an error here since a missing resource is a bigger problem
95
104
  obj = "NA" if obj.empty?
96
-
105
+
97
106
  #TODO remove special characters (faster) as well (eg '?')
98
107
  obj.gsub(' ','_').gsub('?','')
99
108
  elsif obj == nil && options[:encode_nulls]
@@ -108,7 +117,7 @@ module R2RDF
108
117
 
109
118
  def to_literal(obj, options)
110
119
  if obj.is_a? String
111
- # Depressing that there's no more elegant way to check if a string is
120
+ # Depressing that there's no more elegant way to check if a string is
112
121
  # a number...
113
122
  if val = Integer(obj) rescue nil
114
123
  val
@@ -5,15 +5,17 @@ module R2RDF
5
5
  include R2RDF::Interactive
6
6
 
7
7
  def automatic(file=nil,dataset_name=nil,options={},interactive=true)
8
- #to do
8
+ #to do
9
+ # puts "f #{file} \n ds #{dataset_name} opts #{options}"
10
+
9
11
  unless file || !interactive
10
12
  puts "Input file?"
11
13
  file = gets.chomp
12
14
  end
13
-
15
+
14
16
  raise "CSV reader needs an input file" unless file && file.size > 0
15
17
 
16
-
18
+
17
19
  unless dataset_name
18
20
  if interactive
19
21
  dataset_name = interact("Dataset name?","#{File.basename(file).split('.').first}"){|sel| File.basename(file).split('.').first }
@@ -21,7 +23,7 @@ module R2RDF
21
23
  dataset_name = File.basename(file).split('.').first
22
24
  end
23
25
  end
24
-
26
+
25
27
 
26
28
  categories = ::CSV.read(file)[0]
27
29
 
@@ -31,9 +33,9 @@ module R2RDF
31
33
  end
32
34
 
33
35
  unless options[:measures] || !interactive
34
- meas = categories - ((options[:dimensions] || []) | [categories[0]])
36
+ meas = categories - (options[:dimensions] || [categories[0]])
35
37
  selection = interact("Measures?",meas,meas){|s| nil}
36
- options[:measures] = Array(selection) unless options[:measures] == nil
38
+ options[:measures] = Array(selection) unless selection == nil
37
39
  end
38
40
 
39
41
  generate_n3(file,dataset_name,options)
@@ -76,7 +78,7 @@ module R2RDF
76
78
  }
77
79
  tmp = @data.dup
78
80
  tmp.shift
79
-
81
+
80
82
  tmp.map{|row|
81
83
  row.each_with_index{|entry,i|
82
84
  obs[@data[0][i]] << entry
@@ -1,17 +1,18 @@
1
1
  module R2RDF
2
2
  module Reader
3
3
  class Dataframe
4
- include R2RDF::Dataset::DataCube
5
-
4
+ include R2RDF::Dataset::DataCube
5
+ include R2RDF::Reader::Output
6
+
6
7
  # def initialize(var)
7
8
  # @var = var
8
9
  # end
9
-
10
+
10
11
  def generate_n3(rexp, var, options={})
11
12
  @rexp = rexp
12
- @options = options
13
-
14
- generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
13
+ options[:type] ||= :string
14
+ @options = options
15
+ output(generate(measures, dimensions, codes, observation_data, observation_labels, var, options), options)
15
16
  end
16
17
 
17
18
  def dimensions
@@ -21,7 +22,7 @@ module R2RDF
21
22
  [@options[:row_label]]
22
23
  else
23
24
  ["refRow"]
24
- end
25
+ end
25
26
  end
26
27
 
27
28
  def codes
@@ -31,7 +32,7 @@ module R2RDF
31
32
  [@options[:row_label]]
32
33
  else
33
34
  ["refRow"]
34
- end
35
+ end
35
36
  end
36
37
 
37
38
  def measures