bio-publisci 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/Gemfile +1 -1
  2. data/Rakefile +1 -1
  3. data/features/orm_steps.rb +4 -4
  4. data/features/reader.feature +3 -3
  5. data/features/reader_steps.rb +1 -0
  6. data/features/writer.feature +7 -2
  7. data/features/writer_steps.rb +8 -1
  8. data/lib/bio-publisci.rb +3 -1
  9. data/lib/bio-publisci/datacube_model.rb +46 -20
  10. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +196 -194
  11. data/lib/bio-publisci/dataset/ORM/observation.rb +15 -13
  12. data/lib/bio-publisci/dataset/data_cube.rb +3 -3
  13. data/lib/bio-publisci/dataset/dataset_for.rb +25 -4
  14. data/lib/bio-publisci/dsl/dataset_dsl.rb +4 -2
  15. data/lib/bio-publisci/dsl/dsl.rb +3 -0
  16. data/lib/bio-publisci/metadata/generator.rb +1 -1
  17. data/lib/bio-publisci/metadata/metadata_model.rb +27 -0
  18. data/lib/bio-publisci/metadata/prov/activity.rb +1 -0
  19. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +33 -2
  20. data/lib/bio-publisci/query/query_helper.rb +5 -1
  21. data/lib/bio-publisci/readers/arff.rb +2 -40
  22. data/lib/bio-publisci/readers/dataframe.rb +1 -1
  23. data/lib/bio-publisci/writers/arff.rb +42 -16
  24. data/lib/bio-publisci/writers/base.rb +77 -0
  25. data/lib/bio-publisci/writers/csv.rb +31 -0
  26. data/lib/bio-publisci/writers/dataframe.rb +2 -2
  27. data/resources/queries/codes.rq +10 -5
  28. data/resources/queries/dimensions.rq +9 -4
  29. data/resources/queries/measures.rq +7 -2
  30. data/resources/queries/observations.rq +5 -4
  31. data/resources/weather.numeric.arff +26 -21
  32. data/spec/ORM/data_cube_orm_spec.rb +23 -3
  33. data/spec/ORM/prov_model_spec.rb +53 -0
  34. data/spec/dataset_for_spec.rb +21 -0
  35. data/spec/dsl_spec.rb +5 -2
  36. data/spec/metadata/metadata_dsl_spec.rb +1 -1
  37. data/spec/r_builder_spec.rb +2 -2
  38. data/spec/turtle/bacon +1 -1
  39. data/spec/turtle/reference +1 -1
  40. data/spec/turtle/weather +275 -0
  41. data/spec/writer_spec.rb +61 -0
  42. metadata +66 -28
  43. checksums.yaml +0 -7
@@ -1,18 +1,20 @@
1
1
  module PubliSci
2
- module ORM
3
- class Observation
4
- attr_accessor :data
5
- def initialize(data={})
6
- @data = data
7
- end
2
+ module DataSet
3
+ module ORM
4
+ class Observation
5
+ attr_accessor :data
6
+ def initialize(data={})
7
+ @data = data
8
+ end
8
9
 
9
- def method_missing(name, args)
10
- #get entry of data hash
11
- end
10
+ def method_missing(name, args)
11
+ #get entry of data hash
12
+ end
12
13
 
13
- def respond_to_missing?(method, *)
14
+ def respond_to_missing?(method, *)
14
15
 
15
- end
16
- end
17
- end
16
+ end
17
+ end
18
+ end
19
+ end
18
20
  end
@@ -121,7 +121,7 @@ module PubliSci
121
121
  base = options[:base_url]
122
122
  <<-EOF.unindent
123
123
  @base <#{base}/ns/dc/> .
124
- @prefix ns: <#{base}/ns/dataset/#{var}#> .
124
+ @prefix ns: <#{base}/ns/dataset/#{var}/> .
125
125
  @prefix qb: <http://purl.org/linked-data/cube#> .
126
126
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
127
127
  @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@@ -342,7 +342,6 @@ module PubliSci
342
342
  lists << str
343
343
  }
344
344
 
345
-
346
345
  lists
347
346
  end
348
347
 
@@ -357,12 +356,13 @@ module PubliSci
357
356
  else
358
357
  refcode = code[0]
359
358
  end
359
+ # puts data[refcode].uniq
360
360
  data[refcode].uniq.each_with_index{|value,i|
361
361
  unless value == nil && !options[:encode_nulls]
362
362
  concepts << <<-EOF.unindent
363
363
  #{to_resource(value,options)} a skos:Concept, #{code[2]};
364
364
  skos:topConceptOf #{code[1]} ;
365
- skos:prefLabel "#{strip_uri(data[refcode][i])}" ;
365
+ skos:prefLabel "#{strip_uri(value)}" ;
366
366
  skos:inScheme #{code[1]} .
367
367
 
368
368
  EOF
@@ -17,7 +17,7 @@ module PubliSci
17
17
  elsif File.basename(object)[0] == '.' && File.basename(object).count('.') == 1
18
18
  extension = File.basename(object)
19
19
  else
20
- raise "Can't load file #{object}; type inference not yet implemented"
20
+ raise "Can't load file #{object}; file type inference not yet implemented"
21
21
  end
22
22
 
23
23
  case extension
@@ -25,11 +25,17 @@ module PubliSci
25
25
  r_object(object, options, ask_on_ambiguous)
26
26
  when /.csv/i
27
27
  PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
28
+ else
29
+ false
28
30
  end
29
- elsif object =~ %r{http[s]://.+}
30
- self.for(download(object).path, options, ask_on_ambiguous)
31
+ elsif object =~ %r{htt(p|ps)://.+}
32
+ self.for(download(object).path, options, ask_on_ambiguous) || RDF::Statement.new(RDF::URI(object), RDF::URI('http://semanticscience.org/resource/hasValue'), IO.read(download(object).path)).to_s
33
+ # raise res
34
+ # self.for_remote(object)
31
35
  else
32
- raise "Unable to find reader for File or String #{object}"
36
+ raise "Unable to find reader for String '#{object}'"
37
+ # TODO: better handling of missing readers; need this way for raw strings for now
38
+ # false
33
39
  end
34
40
  elsif object.is_a? Rserve::REXP
35
41
  r_object(object, options, ask_on_ambiguous)
@@ -38,6 +44,12 @@ module PubliSci
38
44
  end
39
45
  end
40
46
 
47
+ # def for_remote
48
+ # addr = object
49
+ # tmp = download(object)
50
+ # self.for(tmp.path) || "#{addr} <http://semanticscience.org/resource/"
51
+ # end
52
+
41
53
  def self.download(uri)
42
54
  out = Tempfile.new(uri.split('/').last)
43
55
  out.write open(uri).read
@@ -45,6 +57,15 @@ module PubliSci
45
57
  out
46
58
  end
47
59
 
60
+ # private
61
+ # def self.reader_exists?(object)
62
+ # if object.is_a? String
63
+ # if File.exist? object
64
+
65
+ # elsif
66
+ # end
67
+ # end
68
+
48
69
  def self.r_object(object, options={}, ask_on_ambiguous=true)
49
70
  if object.is_a? String
50
71
  con = Rserve::Connection.new
@@ -15,7 +15,7 @@ module PubliSci
15
15
  # end
16
16
 
17
17
  def object(file=nil)
18
- set_or_get('object',file)
18
+ add_or_get('object',file)
19
19
  end
20
20
  alias_method :source, :object
21
21
 
@@ -61,7 +61,9 @@ module PubliSci
61
61
  if options
62
62
  opts = opts.merge(options)
63
63
  end
64
- Dataset.for(object,opts,interact)
64
+ object().map{|obj|
65
+ Dataset.for(obj,opts,interact)
66
+ }.join("\n")
65
67
  end
66
68
 
67
69
  private
@@ -2,6 +2,9 @@ module PubliSci
2
2
  module DSL
3
3
  attr_reader :base_url
4
4
 
5
+ class Instance
6
+ include PubliSci::DSL
7
+ end
5
8
  # Use to set base url for whole script; helps when referring to dataset
6
9
  # resources from metadata and
7
10
  def base_url=(url)
@@ -39,7 +39,7 @@ module PubliSci
39
39
  dct:creator "#{fields[:creator]}";
40
40
  rdfs:comment "#{fields[:description]}";
41
41
  dct:description "#{fields[:description]}";
42
- dct:issued "#{fields[:date]}"^^xsd:date;
42
+ dct:issued "#{fields[:date]}"^^xsd:date.
43
43
  EOF
44
44
 
45
45
  end_str = ""
@@ -0,0 +1,27 @@
1
+ module PubliSci
2
+ class Metadata
3
+ module Model
4
+ PROV ||= RDF::Vocabulary.new(RDF::URI.new('http://www.w3.org/ns/prov#'))
5
+ QB ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
6
+ DCT ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/dc/terms/'))
7
+ # dct:title "#{fields[:title]}";
8
+ # dct:creator "#{fields[:creator]}";
9
+ # rdfs:comment "#{fields[:description]}";
10
+ # dct:description "#{fields[:description]}";
11
+ # dct:issued "#{fields[:date]}"^^xsd:date.
12
+
13
+ class Meta < Spira::Base
14
+ type PROV.Entity
15
+ type QB.DataSet
16
+ property :label, predicate: RDF::RDFS.label
17
+ property :comment, predicate: RDF::RDFS.comment
18
+ property :description, predicate: DCT.description
19
+ property :creator, predicate: DCT.creator
20
+ property :issued, predicate: DCT.issued
21
+ end
22
+ end
23
+ end
24
+ end
25
+ # rescue LoadError
26
+ # puts "spira not installed, ORM unavailable"
27
+ # end
@@ -35,6 +35,7 @@ class Prov
35
35
  def associated_with(agent=nil, &block)
36
36
  block_list(:associated,:associations,Association,Associations,agent,&block)
37
37
  end
38
+ alias_method :wasAssociatedWith, :associated_with
38
39
 
39
40
  def used(entity=nil, &block)
40
41
  block_list(:use,:usages,Usage,Usages,entity, &block)
@@ -23,6 +23,20 @@ module PubliSci
23
23
  end
24
24
  }
25
25
  end
26
+
27
+ def all_types
28
+ me = self.subject
29
+ type_query = RDF::Query.new do
30
+ pattern [me, RDF.type, :type]
31
+ end
32
+
33
+ type_query.execute(self.class.repository).map{|t| t[:type]}
34
+ end
35
+
36
+ def has_data?
37
+ all_types.include?('http://purl.org/linked-data/cube#DataSet')
38
+ end
39
+
26
40
  end
27
41
 
28
42
  class Agent < Spira::Base
@@ -31,15 +45,28 @@ module PubliSci
31
45
  type PROV.SoftwareAgent
32
46
  type PROV.Person
33
47
  property :label, predicate: RDF::RDFS.label
34
- property :wasGeneratedBy, predicate: PROV.wasGeneratedBy
35
48
  property :foaf_name, predicate: RDF::FOAF.name
36
49
  property :foaf_given, predicate: RDF::FOAF.givenName
37
- property :name, predicate: PROV.actedOnBehalfOf
38
50
  property :actedOnBehalfOf, predicate: PROV.actedOnBehalfOf
39
51
 
52
+
40
53
  def name
41
54
  foaf_given || foaf_name
42
55
  end
56
+
57
+ def name=(name)
58
+ foaf_given = name
59
+ foaf_name = name
60
+
61
+ end
62
+
63
+ def activities
64
+ #should do this in a SPARQL query instead
65
+ Activity.enum_for.map{|act|
66
+ subj = subject()
67
+ act if act.wasAssociatedWith.any?{|assoc| assoc == subj}
68
+ }.reject{|x| x==nil}
69
+ end
43
70
  end
44
71
 
45
72
  class Activity < Spira::Base
@@ -56,6 +83,10 @@ module PubliSci
56
83
  property :label, predicate: RDF::RDFS.label
57
84
  property :agent, predicate: PROV.agent
58
85
  property :hadPlan, predicate: PROV.hadPlan
86
+
87
+ def activity
88
+ Activity.each.to_a.select{|act| act.qualifiedAssociation.include? self}
89
+ end
59
90
  end
60
91
 
61
92
  class Derivation < Spira::Base
@@ -47,7 +47,7 @@ module PubliSci
47
47
  sparql.query(string)
48
48
  end
49
49
 
50
- def execute_from_file(file,store,type=:fourstore)
50
+ def execute_from_file(file,store,type=:fourstore,substitutions={})
51
51
  if File.exist?(file)
52
52
  string = IO.read(file)
53
53
  elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
@@ -57,6 +57,10 @@ module PubliSci
57
57
  else
58
58
  raise "couldn't find query for #{file}"
59
59
  end
60
+
61
+ substitutions.map{|k,v|
62
+ string = string.gsub(k,v)
63
+ }
60
64
  execute(string, store, type)
61
65
  end
62
66
 
@@ -2,14 +2,14 @@ module PubliSci
2
2
  module Reader
3
3
  class ARFF
4
4
  include PubliSci::Dataset::DataCube
5
+
5
6
  def generate_n3(arff, options={})
6
7
  arff = IO.read(arff) if File.exist? arff
7
8
  options[:no_labels] = true # unless options[:no_labels] == nil
8
9
  @options = options
9
10
  comps = components(arff)
10
11
  obs = data(arff, comps.keys)
11
- repl = generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
12
-
12
+ generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
13
13
  end
14
14
 
15
15
  def relation(arff)
@@ -44,44 +44,6 @@ module PubliSci
44
44
  }
45
45
  h
46
46
  end
47
-
48
- # def coded_dimensions
49
- # if @options[:codes]
50
- # @options[:codes]
51
- # elsif @options[:row_label]
52
- # [@options[:row_label]]
53
- # else
54
- # ["refRow"]
55
- # end
56
- # end
57
-
58
- # def measures
59
- # if @options[:dimensions]
60
- # if @options[:measures]
61
- # @options[:measures] - @options[:dimensions]
62
- # else
63
- # # @rexp.payload.names - @options[:dimensions]
64
- # end
65
- # else
66
- # @options[:measures] # || @rexp.payload.names
67
- # end
68
- # end
69
-
70
- # def observation_labels
71
- # # row_names = @rexp.attr.payload["row.names"].to_ruby
72
- # # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
73
- # # row_names
74
- # end
75
-
76
- # def observation_data
77
-
78
- # # data = {}
79
- # # @rexp.payload.names.map{|name|
80
- # # data[name] = @rexp.payload[name].to_ruby
81
- # # }
82
- # # data[@options[:row_label] || "refRow"] = observation_labels()
83
- # # data
84
- # end
85
47
  end
86
48
  end
87
49
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Reader
2
+ module Reader
3
3
  class Dataframe
4
4
  include PubliSci::Dataset::DataCube
5
5
  include PubliSci::Reader::Output
@@ -1,9 +1,9 @@
1
1
  module PubliSci
2
- module Writer
3
- class ARFF
4
- include PubliSci::Query
5
- include PubliSci::Parser
6
- include PubliSci::Analyzer
2
+ module Writers
3
+ class ARFF < Base
4
+ # include PubliSci::Query
5
+ # include PubliSci::Parser
6
+ # include PubliSci::Analyzer
7
7
 
8
8
  def build_arff(relation, attributes, data, source)
9
9
  str = <<-EOS
@@ -31,34 +31,60 @@ EOS
31
31
  repo = RDF::Repository.load(turtle_file)
32
32
  puts "loaded #{repo.size} statements into temporary repo" if verbose
33
33
 
34
- dims = execute_from_file("dimensions.rq",repo,:graph).to_h.map{|d| [d[:dimension].to_s, d[:label].to_s]}
35
- meas = execute_from_file("measures.rq",repo,:graph).to_h.map{|m| [m[:measure].to_s, m[:label].to_s]}
36
- relation = execute_from_file("dataset.rq",repo,:graph).to_h.first[:label].to_s
37
- codes = execute_from_file("codes.rq",repo,:graph).to_h.map{|e| e.values.map(&:to_s)}.inject({}){|h,el|
38
- (h[el.first]||=[]) << el.last; h
39
- }
34
+ dims = dimensions(repo)
35
+ meas = measures(repo)
36
+ data = observations(repo)
37
+
38
+ relation = dataSet(repo)
39
+ codes = codes(repo)
40
40
 
41
- data = observation_hash(execute_from_file("observations.rq",repo,:graph), true)
42
41
  attributes = {}
42
+
43
43
  (dims | meas).map{|component|
44
- attributes[component[1]] = case recommend_range(data.map{|o| o[1][component[1]]})
44
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
45
45
  when "xsd:int"
46
46
  "integer"
47
47
  when "xsd:double"
48
48
  "real"
49
49
  when :coded
50
50
  if dims.include? component
51
- "{#{codes[component[1]].join(',')}}"
51
+ "{#{codes[component].join(', ')}}"
52
52
  else
53
53
  "string"
54
54
  end
55
55
  end
56
56
  }
57
+
57
58
  build_arff(relation, attributes, data, turtle_file)
58
59
  end
59
60
 
60
- def from_store(endpoint_url,variable_in=nil, variable_out=nil, verbose=false)
61
- raise "not implemented yet"
61
+ def from_store(repo, dataset=nil, title=nil, verbose=false)
62
+ # data = observation_hash(execute_from_file("observations.rq",repo,:graph,{"%{dataSet}"=>"<#{dataSet}>"}), true)
63
+
64
+ dims = dimensions(repo,dataset)
65
+ meas = measures(repo,dataset)
66
+ data = observations(repo,dataset)
67
+ codes = codes(repo,dataset)
68
+ attributes = {}
69
+
70
+ (dims | meas).map{|component|
71
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
72
+ when "xsd:int"
73
+ "integer"
74
+ when "xsd:double"
75
+ "real"
76
+ when :coded
77
+ if dims.include? component
78
+ "{#{codes[component].join(', ')}}"
79
+ else
80
+ "string"
81
+ end
82
+ end
83
+ }
84
+
85
+ dataset = dataSet(repo) unless dataset
86
+ title = dataset unless title
87
+ build_arff(title,attributes,data,dataset)
62
88
  end
63
89
  end
64
90
  end
@@ -0,0 +1,77 @@
1
+ module PubliSci
2
+ module Writers
3
+ class Base
4
+ include PubliSci::Query
5
+ include PubliSci::Parser
6
+ include PubliSci::Analyzer
7
+
8
+ def handle_input(input)
9
+ if input.is_a? String
10
+ if File.exist? input
11
+ RDF::Repository.load(input)
12
+ else
13
+ raise "UnkownStringInput: #{input}"
14
+ end
15
+ elsif input.is_a? RDF::Repository
16
+ input
17
+ else
18
+ raise "UnkownInput: #{input}, #{input.class}"
19
+ end
20
+ end
21
+
22
+ def dimensions(input, data_set=nil, select=:label)
23
+ repo = handle_input(input)
24
+
25
+ if data_set
26
+ dims = execute_from_file("dimensions.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
27
+ else
28
+ dims = execute_from_file("dimensions.rq",repo,:graph)
29
+ end
30
+
31
+ dims.to_h.map{|d| d[select].to_s}
32
+ end
33
+
34
+ def measures(input, data_set=nil, select=:label)
35
+ repo = handle_input(input)
36
+
37
+ if data_set
38
+ meas = execute_from_file("measures.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
39
+ else
40
+ meas = execute_from_file("measures.rq",repo,:graph)
41
+ end
42
+
43
+ meas.to_h.map{|d| d[select].to_s}
44
+ end
45
+
46
+ def observations(input, data_set = nil, shorten_url = true)
47
+ repo = handle_input(input)
48
+
49
+ if data_set
50
+ obs = execute_from_file("observations.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
51
+ else
52
+ obs = execute_from_file("observations.rq",repo,:graph)
53
+ end
54
+
55
+ observation_hash(obs,shorten_url)
56
+ end
57
+
58
+ def dataSet(input, select = :label)
59
+ repo = handle_input(input)
60
+
61
+ execute_from_file("dataset.rq",repo,:graph).to_h.first[select].to_s
62
+ end
63
+
64
+ def codes(input, data_set = nil, select = :label)
65
+ repo = handle_input(input)
66
+ if data_set
67
+ codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
68
+ else
69
+ codes = execute_from_file("codes.rq",repo,:graph).to_h
70
+ end
71
+ codes.map{|c| c.values.map(&:to_s)}.inject({}){|h,el|
72
+ (h[el.first]||=[]) << el.last; h
73
+ }
74
+ end
75
+ end
76
+ end
77
+ end