bio-publisci 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Rakefile +5 -5
  4. data/bin/bio-publisci +34 -11
  5. data/examples/bio-band_integration.rb +9 -0
  6. data/examples/no_magic.prov +40 -0
  7. data/examples/primer.prov +28 -0
  8. data/examples/prov_dsl.prov +51 -0
  9. data/features/create_generator.feature +5 -9
  10. data/features/integration_steps.rb +8 -8
  11. data/features/metadata.feature +15 -2
  12. data/features/metadata_steps.rb +21 -0
  13. data/features/orm_steps.rb +5 -5
  14. data/features/prov_dsl.feature +14 -0
  15. data/features/prov_dsl_steps.rb +11 -0
  16. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
  17. data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
  18. data/lib/bio-publisci/dataset/data_cube.rb +30 -26
  19. data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
  20. data/lib/bio-publisci/metadata/metadata.rb +180 -42
  21. data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
  22. data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
  23. data/lib/bio-publisci/metadata/prov/association.rb +73 -0
  24. data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
  25. data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
  26. data/lib/bio-publisci/metadata/prov/element.rb +52 -0
  27. data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
  28. data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
  29. data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
  30. data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
  31. data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
  32. data/lib/bio-publisci/output.rb +27 -0
  33. data/lib/bio-publisci/parser.rb +17 -8
  34. data/lib/bio-publisci/readers/csv.rb +9 -7
  35. data/lib/bio-publisci/readers/dataframe.rb +9 -8
  36. data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
  37. data/lib/bio-publisci/readers/r_matrix.rb +37 -13
  38. data/lib/bio-publisci/spira.rb +82 -0
  39. data/lib/bio-publisci/writers/dataframe.rb +65 -65
  40. data/lib/bio-publisci.rb +9 -4
  41. data/spec/ORM/data_cube_orm_spec.rb +3 -3
  42. data/spec/dataset_for_spec.rb +29 -0
  43. data/spec/generators/r_cross_spec.rb +51 -0
  44. data/spec/generators/r_matrix_spec.rb +14 -5
  45. metadata +42 -8
  46. data/lib/bio-publisci/readers/cross.rb +0 -72
@@ -0,0 +1,159 @@
1
+ module PubliSci
2
+ module Prov
3
+ module DSL
4
+
5
+ include PubliSci::Vocabulary
6
+
7
+ class Singleton
8
+ include Prov::DSL
9
+
10
+ def initialize
11
+ Prov.registry.clear
12
+ end
13
+ end
14
+
15
+ def self.included(mod)
16
+ Prov.registry.clear
17
+ end
18
+
19
+ def agent(name,args={}, &block)
20
+ if block_given?
21
+ a = Prov::Agent.new
22
+ a.instance_eval(&block)
23
+ a.__label=name
24
+ Prov.register(name, a)
25
+ else
26
+ # name = args.shift
27
+ # args = Hash[*args]
28
+ a = Prov::Agent.new
29
+
30
+ a.__label=name
31
+
32
+ a.subject args[:subject]
33
+
34
+ (args.keys - [:subject]).map{|k|
35
+ raise "Unkown agent setting #{k}" unless try_auto_set(a,k,args[k])
36
+ }
37
+
38
+
39
+ Prov.register(name, a)
40
+ end
41
+ end
42
+
43
+ def organization(name,args={},&block)
44
+ args[:type] = :organization
45
+ agent(name,args,&block)
46
+ end
47
+
48
+ def entity(name, args={}, &block)
49
+ if block_given?
50
+ e = Prov::Entity.new
51
+ e.instance_eval(&block)
52
+ e.__label=name
53
+ Prov.register(name, e)
54
+ else
55
+ # name = args.shift
56
+ # args = Hash[*args]
57
+ e = Prov::Entity.new
58
+
59
+ e.__label=name
60
+ e.subject args[:subject]
61
+ (args.keys - [:subject]).map{|k|
62
+ raise "Unkown entity setting #{k}" unless try_auto_set(e,k,args[k])
63
+ }
64
+
65
+ Prov.register(name, e)
66
+ end
67
+ end
68
+ alias_method :data, :entity
69
+
70
+ def plan(name, args={}, &block)
71
+ if block_given?
72
+ p = Prov::Plan.new
73
+ p.instance_eval(&block)
74
+ p.__label=name
75
+ Prov.register(name, e)
76
+ else
77
+ p = Prov::Plan.new
78
+
79
+ p.__label=name
80
+ p.subject args[:subject]
81
+ (args.keys - [:subject]).map{|k|
82
+ raise "Unkown plan setting #{k}" unless try_auto_set(p,k,args[k])
83
+ }
84
+
85
+
86
+ Prov.register(name, p)
87
+ end
88
+ end
89
+
90
+ def activity(name,args={}, &block)
91
+ if block_given?
92
+ act = Prov::Activity.new
93
+ act.instance_eval(&block)
94
+ act.__label=name
95
+ Prov.register(name, act)
96
+ else
97
+
98
+ act.subject args[:subject]
99
+
100
+ (args.keys - [:subject]).map{|k|
101
+ raise "Unkown agent setting #{k}" unless try_auto_set(act,k,args[k])
102
+ }
103
+
104
+ a = Prov::Activity.new
105
+
106
+ act.__label=name
107
+ Prov.register(name, act)
108
+ raise "has based activity creation not yet implemented"
109
+ end
110
+ end
111
+
112
+ def generate_n3(abbreviate = false)
113
+ entities = Prov.entities.values.map(&:to_n3).join
114
+ agents = Prov.agents.values.map(&:to_n3).join
115
+ activities = Prov.activities.values.map(&:to_n3).join
116
+ plans = Prov.plans.values.map(&:to_n3).join
117
+ associations = Prov.associations.map(&:to_n3).join
118
+ derivations = Prov.registry[:derivation].map(&:to_n3).join if Prov.registry[:derivation]
119
+
120
+ str = "#{entities}#{agents}#{activities}#{plans}#{associations}#{derivations}"
121
+
122
+ if abbreviate
123
+ abbreviate_known(str)
124
+ else
125
+ str
126
+ end
127
+ end
128
+
129
+ def return_objects
130
+ Prov.registry
131
+ end
132
+
133
+ # def vocabulary(url)
134
+ # raise "InvalidVocabulary: #{url} is not a valid URI" unless RDF::Resource(url).valid?
135
+ # RDF::Vocabulary.new(url)
136
+ # end
137
+
138
+ private
139
+ def try_auto_set(object,method,args)
140
+ if object.methods.include? method
141
+ object.send(method,args)
142
+ true
143
+ else
144
+ false
145
+ end
146
+ end
147
+
148
+ def abbreviate_known(turtle)
149
+ ttl = turtle.dup
150
+ %w{activity assoc agent plan entity derivation}.each{|element|
151
+ ttl.gsub!(%r{<#{Prov.base_url}/#{element}/([\w|\d]+)>}, "#{element}:" + '\1')
152
+ }
153
+
154
+ ttl.gsub!(%r{<http://gsocsemantic.wordpress.com/([\w|\d]+)>}, 'me:\1')
155
+ ttl
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,52 @@
1
+ module PubliSci
2
+ module Prov
3
+ module Element
4
+ include PubliSci::Vocabulary
5
+ include PubliSci::CustomPredicate
6
+
7
+ def subject(s=nil)
8
+ if s
9
+ if s.is_a? Symbol
10
+ raise "subject generation from symbol not yet implemented!"
11
+ else
12
+ @subject = s
13
+ end
14
+ else
15
+ @subject ||= generate_subject
16
+ end
17
+ end
18
+
19
+ def subject=(s)
20
+ @subject = s
21
+ end
22
+
23
+ def __label=(l)
24
+ @__label = l
25
+ end
26
+
27
+ def __label
28
+ raise "MissingInternalLabel: no __label for #{self.inspect}" unless @__label
29
+ @__label
30
+ end
31
+
32
+ private
33
+ def generate_subject
34
+ # puts self.class == Prov::Activity
35
+ category = case self
36
+ when Agent
37
+ "agent"
38
+ when Entity
39
+ "entity"
40
+ when Activity
41
+ "activity"
42
+ when Plan
43
+ "plan"
44
+ else
45
+ raise "MissingSubject: No automatic subject generation for #{self}"
46
+ end
47
+
48
+ "#{Prov.base_url}/#{category}/#{__label}"
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,101 @@
1
+ module PubliSci
2
+ module Prov
3
+ class Entity
4
+ class Derivations < Array
5
+ def [](index)
6
+ if self.fetch(index).is_a? Symbol
7
+ Prov.entities[self.fetch(index)]
8
+ else
9
+ self.fetch(index)
10
+ end
11
+ end
12
+ end
13
+
14
+ include Prov::Element
15
+
16
+ def source(s=nil)
17
+ if s
18
+ (@sources ||= []) << s
19
+ else
20
+ @sources
21
+ end
22
+ end
23
+
24
+ def generated_by(activity=nil)
25
+ if activity
26
+ @generated_by = activity
27
+ elsif @generated_by.is_a? Symbol
28
+ @generated_by = Prov.activities[@generated_by]
29
+ else
30
+ @generated_by
31
+ end
32
+ end
33
+
34
+ def attributed_to(agent=nil)
35
+ if agent
36
+ @attributed_to = agent
37
+ elsif @attributed_to.is_a? Symbol
38
+ @attributed_to = Prov.agents[@attributed_to]
39
+ else
40
+ @attributed_to
41
+ end
42
+ end
43
+
44
+ def derived_from(entity=nil,&block)
45
+ if block_given?
46
+ deriv = Derivation.new
47
+ deriv.instance_eval(&block)
48
+ (@derived_from ||= Derivations.new) << deriv
49
+ Prov.register(nil,deriv)
50
+ else
51
+ if entity
52
+ (@derived_from ||= Derivations.new) << entity
53
+ else
54
+ @derived_from
55
+ end
56
+ end
57
+ end
58
+
59
+ # def derived_from[](entity)
60
+ # if @derived_from && @derived_from[entity]
61
+ # if entity.is_a? Symbol
62
+ # Prov.entities[entity]
63
+ # else
64
+ # entity
65
+ # end
66
+ # end
67
+ # end
68
+
69
+ def to_n3
70
+ str = "<#{subject}> a prov:Entity ;\n"
71
+ str << "\tprov:wasGeneratedBy <#{generated_by}> ;\n" if generated_by
72
+ str << "\tprov:wasAttributedTo <#{attributed_to}> ;\n" if attributed_to
73
+ if derived_from
74
+ derived_from.map{|der|
75
+ der = Prov.entities[der] if der.is_a?(Symbol) && Prov.entities[der]
76
+
77
+ if der.is_a? Derivation
78
+ str << "\tprov:wasDerivedFrom <#{der.entity}> ;\n"
79
+ str << "\tprov:qualifiedDerivation <#{der.subject}> ;\n"
80
+ else
81
+ str << "\tprov:wasDerivedFrom <#{der}> ;\n"
82
+ end
83
+ }
84
+ end
85
+
86
+ # if custom
87
+ # @custom.map{|k,v|
88
+ # str << "\t<#{k.to_s}> <#{v.to_s}> ;\n"
89
+ # }
90
+ # end
91
+ add_custom(str)
92
+
93
+ str << %Q(\trdfs:label "#{__label}" .\n\n)
94
+ end
95
+
96
+ def to_s
97
+ subject
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,32 @@
1
+ module PubliSci
2
+ module Prov
3
+ class Plan
4
+ include Prov::Element
5
+
6
+ def steps(steps=nil)
7
+ if steps
8
+ if File.exist? steps
9
+ steps = Array[IO.read(steps).split("\n")]
10
+ end
11
+ @steps = Array[steps]
12
+ else
13
+ @steps
14
+ end
15
+ end
16
+
17
+ def to_n3
18
+ str = "<#{subject}> a prov:Plan, prov:Entity ;\n"
19
+ if steps
20
+ str << "\trdfs:comment (\"#{steps.join('" "')}\") ;\n"
21
+ end
22
+ add_custom(str)
23
+
24
+ str << "\trdfs:label \"#{__label}\" .\n\n"
25
+ end
26
+
27
+ def to_s
28
+ subject
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,76 @@
1
+ module PubliSci
2
+ module Prov
3
+ def self.register(name,object)
4
+ # puts "register #{name} #{object} #{associations.size}"
5
+ name = name.to_sym if name
6
+ if object.is_a? Agent
7
+ sub = :agents
8
+ elsif object.is_a? Entity
9
+ sub = :entities
10
+ elsif object.is_a? Activity
11
+ sub = :activities
12
+ elsif object.is_a? Association
13
+ sub = :associations
14
+ elsif object.is_a? Plan
15
+ sub = :plans
16
+ else
17
+ sub = object.class.to_s.split('::').last.downcase.to_sym
18
+ # raise "UnknownElement: unkown object type for #{object}"
19
+ end
20
+ if name
21
+ (registry[sub] ||= {})[name] = object
22
+ else
23
+ (registry[sub] ||= []) << object
24
+ end
25
+ end
26
+
27
+ def self.registry
28
+ @registry ||= {}
29
+ end
30
+
31
+ def self.run(string)
32
+ if File.exists? string
33
+ DSL::Singleton.new.instance_eval(IO.read(string),string)
34
+ else
35
+ DSL::Singleton.new.instance_eval(string)
36
+ end
37
+ end
38
+
39
+ def self.agents
40
+ registry[:agents] ||= {}
41
+ end
42
+
43
+ def self.entities
44
+ registry[:entities] ||= {}
45
+ end
46
+
47
+ def self.activities
48
+ registry[:activities] ||= {}
49
+ end
50
+
51
+ def self.associations
52
+ registry[:associations] ||= []
53
+ end
54
+
55
+ def self.plans
56
+ registry[:plans] ||= {}
57
+ end
58
+
59
+ def self.base_url
60
+ @base_url ||= "http://rqtl.org/ns"
61
+ end
62
+
63
+ def self.base_url=(url)
64
+ @base_url = url
65
+ end
66
+
67
+ def self.prefixes
68
+ <<-EOF
69
+ @prefix prov: <http://www.w3.org/ns/prov#> .
70
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
71
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
72
+ @prefix foaf: <http://xmlns.com/foaf/0.1/> .
73
+ EOF
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,26 @@
1
+ module PubliSci
2
+ module CustomPredicate
3
+ def has(predicate, object)
4
+ predicate = RDF::Resource(predicate) if RDF::Resource(predicate).valid?
5
+ obj = RDF::Resource(object)
6
+ obj = RDF::Literal(object) unless obj.valid?
7
+ ((@custom ||= {})[predicate] ||= []) << obj
8
+ end
9
+ alias_method :set, :has
10
+
11
+ def custom
12
+ @custom
13
+ end
14
+
15
+ def add_custom(str)
16
+ if custom
17
+ custom.map{|k,v|
18
+ pk = k.respond_to?(:to_base) ? k.to_base : k
19
+ v.map{|vv|
20
+ str << "\t#{pk} #{vv.to_base} ;\n"
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,8 @@
1
+ module PubliSci
2
+ module Vocabulary
3
+ def vocabulary(url)
4
+ raise "InvalidVocabulary: #{url} is not a valid URI" unless RDF::Resource(url).valid?
5
+ RDF::Vocabulary.new(url)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,27 @@
1
+ module R2RDF
2
+ module Reader
3
+ module Output
4
+ def output(string, options={},append=false)
5
+ options[:type] = [:string] unless options[:type]
6
+ base = options[:file_base]
7
+ name = options[:file_name]
8
+ types = Array(options[:type])
9
+
10
+ if types.include? :print
11
+ puts string
12
+ end
13
+
14
+ if types.include? :file
15
+ raise "no file specified output" unless name
16
+
17
+ method = append ? 'a' : 'w'
18
+ open("#{base}#{name}", method) { |f| f.write str }
19
+ end
20
+
21
+ if types.include? :string
22
+ string
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,12 +1,21 @@
1
1
  module R2RDF
2
2
  module Parser
3
3
 
4
+ def is_uri?(string)
5
+ RDF::Resource(string).valid?
6
+ end
7
+
4
8
  def sanitize(array)
5
9
  #remove spaces and other special characters
10
+ array = Array(array)
6
11
  processed = []
7
12
  array.map{|entry|
8
13
  if entry.is_a? String
9
- processed << entry.gsub(/[\s\.]/,'_')
14
+ if is_uri? entry
15
+ processed << entry.gsub(/[\s]/,'_')
16
+ else
17
+ processed << entry.gsub(/[\s\.]/,'_')
18
+ end
10
19
  else
11
20
  processed << entry
12
21
  end
@@ -16,7 +25,7 @@ module R2RDF
16
25
 
17
26
  def sanitize_hash(h)
18
27
  mappings = {}
19
- h.keys.map{|k|
28
+ h.keys.map{|k|
20
29
  if(k.is_a? String)
21
30
  mappings[k] = k.gsub(' ','_')
22
31
  end
@@ -74,9 +83,9 @@ module R2RDF
74
83
 
75
84
  if shorten_uris
76
85
  newh= {}
77
- h.map{|k,v|
86
+ h.map{|k,v|
78
87
  newh[strip_uri(k)] ||= {}
79
- v.map{|kk,vv|
88
+ v.map{|kk,vv|
80
89
  newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
81
90
  }
82
91
  }
@@ -88,12 +97,12 @@ module R2RDF
88
97
 
89
98
  def to_resource(obj, options)
90
99
  if obj.is_a? String
91
- obj = "<#{obj}>" if obj =~ /^http:\/\//
92
-
100
+ obj = "<#{obj}>" if is_uri? obj
101
+
93
102
  #TODO decide the right way to handle missing values, since RDF has no null
94
103
  #probably throw an error here since a missing resource is a bigger problem
95
104
  obj = "NA" if obj.empty?
96
-
105
+
97
106
  #TODO remove special characters (faster) as well (eg '?')
98
107
  obj.gsub(' ','_').gsub('?','')
99
108
  elsif obj == nil && options[:encode_nulls]
@@ -108,7 +117,7 @@ module R2RDF
108
117
 
109
118
  def to_literal(obj, options)
110
119
  if obj.is_a? String
111
- # Depressing that there's no more elegant way to check if a string is
120
+ # Depressing that there's no more elegant way to check if a string is
112
121
  # a number...
113
122
  if val = Integer(obj) rescue nil
114
123
  val
@@ -5,15 +5,17 @@ module R2RDF
5
5
  include R2RDF::Interactive
6
6
 
7
7
  def automatic(file=nil,dataset_name=nil,options={},interactive=true)
8
- #to do
8
+ #to do
9
+ # puts "f #{file} \n ds #{dataset_name} opts #{options}"
10
+
9
11
  unless file || !interactive
10
12
  puts "Input file?"
11
13
  file = gets.chomp
12
14
  end
13
-
15
+
14
16
  raise "CSV reader needs an input file" unless file && file.size > 0
15
17
 
16
-
18
+
17
19
  unless dataset_name
18
20
  if interactive
19
21
  dataset_name = interact("Dataset name?","#{File.basename(file).split('.').first}"){|sel| File.basename(file).split('.').first }
@@ -21,7 +23,7 @@ module R2RDF
21
23
  dataset_name = File.basename(file).split('.').first
22
24
  end
23
25
  end
24
-
26
+
25
27
 
26
28
  categories = ::CSV.read(file)[0]
27
29
 
@@ -31,9 +33,9 @@ module R2RDF
31
33
  end
32
34
 
33
35
  unless options[:measures] || !interactive
34
- meas = categories - ((options[:dimensions] || []) | [categories[0]])
36
+ meas = categories - (options[:dimensions] || [categories[0]])
35
37
  selection = interact("Measures?",meas,meas){|s| nil}
36
- options[:measures] = Array(selection) unless options[:measures] == nil
38
+ options[:measures] = Array(selection) unless selection == nil
37
39
  end
38
40
 
39
41
  generate_n3(file,dataset_name,options)
@@ -76,7 +78,7 @@ module R2RDF
76
78
  }
77
79
  tmp = @data.dup
78
80
  tmp.shift
79
-
81
+
80
82
  tmp.map{|row|
81
83
  row.each_with_index{|entry,i|
82
84
  obs[@data[0][i]] << entry
@@ -1,17 +1,18 @@
1
1
  module R2RDF
2
2
  module Reader
3
3
  class Dataframe
4
- include R2RDF::Dataset::DataCube
5
-
4
+ include R2RDF::Dataset::DataCube
5
+ include R2RDF::Reader::Output
6
+
6
7
  # def initialize(var)
7
8
  # @var = var
8
9
  # end
9
-
10
+
10
11
  def generate_n3(rexp, var, options={})
11
12
  @rexp = rexp
12
- @options = options
13
-
14
- generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
13
+ options[:type] ||= :string
14
+ @options = options
15
+ output(generate(measures, dimensions, codes, observation_data, observation_labels, var, options), options)
15
16
  end
16
17
 
17
18
  def dimensions
@@ -21,7 +22,7 @@ module R2RDF
21
22
  [@options[:row_label]]
22
23
  else
23
24
  ["refRow"]
24
- end
25
+ end
25
26
  end
26
27
 
27
28
  def codes
@@ -31,7 +32,7 @@ module R2RDF
31
32
  [@options[:row_label]]
32
33
  else
33
34
  ["refRow"]
34
- end
35
+ end
35
36
  end
36
37
 
37
38
  def measures