bio-publisci 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/Gemfile +1 -1
  2. data/Rakefile +1 -1
  3. data/features/orm_steps.rb +4 -4
  4. data/features/reader.feature +3 -3
  5. data/features/reader_steps.rb +1 -0
  6. data/features/writer.feature +7 -2
  7. data/features/writer_steps.rb +8 -1
  8. data/lib/bio-publisci.rb +3 -1
  9. data/lib/bio-publisci/datacube_model.rb +46 -20
  10. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +196 -194
  11. data/lib/bio-publisci/dataset/ORM/observation.rb +15 -13
  12. data/lib/bio-publisci/dataset/data_cube.rb +3 -3
  13. data/lib/bio-publisci/dataset/dataset_for.rb +25 -4
  14. data/lib/bio-publisci/dsl/dataset_dsl.rb +4 -2
  15. data/lib/bio-publisci/dsl/dsl.rb +3 -0
  16. data/lib/bio-publisci/metadata/generator.rb +1 -1
  17. data/lib/bio-publisci/metadata/metadata_model.rb +27 -0
  18. data/lib/bio-publisci/metadata/prov/activity.rb +1 -0
  19. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +33 -2
  20. data/lib/bio-publisci/query/query_helper.rb +5 -1
  21. data/lib/bio-publisci/readers/arff.rb +2 -40
  22. data/lib/bio-publisci/readers/dataframe.rb +1 -1
  23. data/lib/bio-publisci/writers/arff.rb +42 -16
  24. data/lib/bio-publisci/writers/base.rb +77 -0
  25. data/lib/bio-publisci/writers/csv.rb +31 -0
  26. data/lib/bio-publisci/writers/dataframe.rb +2 -2
  27. data/resources/queries/codes.rq +10 -5
  28. data/resources/queries/dimensions.rq +9 -4
  29. data/resources/queries/measures.rq +7 -2
  30. data/resources/queries/observations.rq +5 -4
  31. data/resources/weather.numeric.arff +26 -21
  32. data/spec/ORM/data_cube_orm_spec.rb +23 -3
  33. data/spec/ORM/prov_model_spec.rb +53 -0
  34. data/spec/dataset_for_spec.rb +21 -0
  35. data/spec/dsl_spec.rb +5 -2
  36. data/spec/metadata/metadata_dsl_spec.rb +1 -1
  37. data/spec/r_builder_spec.rb +2 -2
  38. data/spec/turtle/bacon +1 -1
  39. data/spec/turtle/reference +1 -1
  40. data/spec/turtle/weather +275 -0
  41. data/spec/writer_spec.rb +61 -0
  42. metadata +66 -28
  43. checksums.yaml +0 -7
@@ -1,18 +1,20 @@
1
1
  module PubliSci
2
- module ORM
3
- class Observation
4
- attr_accessor :data
5
- def initialize(data={})
6
- @data = data
7
- end
2
+ module DataSet
3
+ module ORM
4
+ class Observation
5
+ attr_accessor :data
6
+ def initialize(data={})
7
+ @data = data
8
+ end
8
9
 
9
- def method_missing(name, args)
10
- #get entry of data hash
11
- end
10
+ def method_missing(name, args)
11
+ #get entry of data hash
12
+ end
12
13
 
13
- def respond_to_missing?(method, *)
14
+ def respond_to_missing?(method, *)
14
15
 
15
- end
16
- end
17
- end
16
+ end
17
+ end
18
+ end
19
+ end
18
20
  end
@@ -121,7 +121,7 @@ module PubliSci
121
121
  base = options[:base_url]
122
122
  <<-EOF.unindent
123
123
  @base <#{base}/ns/dc/> .
124
- @prefix ns: <#{base}/ns/dataset/#{var}#> .
124
+ @prefix ns: <#{base}/ns/dataset/#{var}/> .
125
125
  @prefix qb: <http://purl.org/linked-data/cube#> .
126
126
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
127
127
  @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@@ -342,7 +342,6 @@ module PubliSci
342
342
  lists << str
343
343
  }
344
344
 
345
-
346
345
  lists
347
346
  end
348
347
 
@@ -357,12 +356,13 @@ module PubliSci
357
356
  else
358
357
  refcode = code[0]
359
358
  end
359
+ # puts data[refcode].uniq
360
360
  data[refcode].uniq.each_with_index{|value,i|
361
361
  unless value == nil && !options[:encode_nulls]
362
362
  concepts << <<-EOF.unindent
363
363
  #{to_resource(value,options)} a skos:Concept, #{code[2]};
364
364
  skos:topConceptOf #{code[1]} ;
365
- skos:prefLabel "#{strip_uri(data[refcode][i])}" ;
365
+ skos:prefLabel "#{strip_uri(value)}" ;
366
366
  skos:inScheme #{code[1]} .
367
367
 
368
368
  EOF
@@ -17,7 +17,7 @@ module PubliSci
17
17
  elsif File.basename(object)[0] == '.' && File.basename(object).count('.') == 1
18
18
  extension = File.basename(object)
19
19
  else
20
- raise "Can't load file #{object}; type inference not yet implemented"
20
+ raise "Can't load file #{object}; file type inference not yet implemented"
21
21
  end
22
22
 
23
23
  case extension
@@ -25,11 +25,17 @@ module PubliSci
25
25
  r_object(object, options, ask_on_ambiguous)
26
26
  when /.csv/i
27
27
  PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
28
+ else
29
+ false
28
30
  end
29
- elsif object =~ %r{http[s]://.+}
30
- self.for(download(object).path, options, ask_on_ambiguous)
31
+ elsif object =~ %r{htt(p|ps)://.+}
32
+ self.for(download(object).path, options, ask_on_ambiguous) || RDF::Statement.new(RDF::URI(object), RDF::URI('http://semanticscience.org/resource/hasValue'), IO.read(download(object).path)).to_s
33
+ # raise res
34
+ # self.for_remote(object)
31
35
  else
32
- raise "Unable to find reader for File or String #{object}"
36
+ raise "Unable to find reader for String '#{object}'"
37
+ # TODO: better handling of missing readers; need this way for raw strings for now
38
+ # false
33
39
  end
34
40
  elsif object.is_a? Rserve::REXP
35
41
  r_object(object, options, ask_on_ambiguous)
@@ -38,6 +44,12 @@ module PubliSci
38
44
  end
39
45
  end
40
46
 
47
+ # def for_remote
48
+ # addr = object
49
+ # tmp = download(object)
50
+ # self.for(tmp.path) || "#{addr} <http://semanticscience.org/resource/"
51
+ # end
52
+
41
53
  def self.download(uri)
42
54
  out = Tempfile.new(uri.split('/').last)
43
55
  out.write open(uri).read
@@ -45,6 +57,15 @@ module PubliSci
45
57
  out
46
58
  end
47
59
 
60
+ # private
61
+ # def self.reader_exists?(object)
62
+ # if object.is_a? String
63
+ # if File.exist? object
64
+
65
+ # elsif
66
+ # end
67
+ # end
68
+
48
69
  def self.r_object(object, options={}, ask_on_ambiguous=true)
49
70
  if object.is_a? String
50
71
  con = Rserve::Connection.new
@@ -15,7 +15,7 @@ module PubliSci
15
15
  # end
16
16
 
17
17
  def object(file=nil)
18
- set_or_get('object',file)
18
+ add_or_get('object',file)
19
19
  end
20
20
  alias_method :source, :object
21
21
 
@@ -61,7 +61,9 @@ module PubliSci
61
61
  if options
62
62
  opts = opts.merge(options)
63
63
  end
64
- Dataset.for(object,opts,interact)
64
+ object().map{|obj|
65
+ Dataset.for(obj,opts,interact)
66
+ }.join("\n")
65
67
  end
66
68
 
67
69
  private
@@ -2,6 +2,9 @@ module PubliSci
2
2
  module DSL
3
3
  attr_reader :base_url
4
4
 
5
+ class Instance
6
+ include PubliSci::DSL
7
+ end
5
8
  # Use to set base url for whole script; helps when referring to dataset
6
9
  # resources from metadata and
7
10
  def base_url=(url)
@@ -39,7 +39,7 @@ module PubliSci
39
39
  dct:creator "#{fields[:creator]}";
40
40
  rdfs:comment "#{fields[:description]}";
41
41
  dct:description "#{fields[:description]}";
42
- dct:issued "#{fields[:date]}"^^xsd:date;
42
+ dct:issued "#{fields[:date]}"^^xsd:date.
43
43
  EOF
44
44
 
45
45
  end_str = ""
@@ -0,0 +1,27 @@
1
+ module PubliSci
2
+ class Metadata
3
+ module Model
4
+ PROV ||= RDF::Vocabulary.new(RDF::URI.new('http://www.w3.org/ns/prov#'))
5
+ QB ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
6
+ DCT ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/dc/terms/'))
7
+ # dct:title "#{fields[:title]}";
8
+ # dct:creator "#{fields[:creator]}";
9
+ # rdfs:comment "#{fields[:description]}";
10
+ # dct:description "#{fields[:description]}";
11
+ # dct:issued "#{fields[:date]}"^^xsd:date.
12
+
13
+ class Meta < Spira::Base
14
+ type PROV.Entity
15
+ type QB.DataSet
16
+ property :label, predicate: RDF::RDFS.label
17
+ property :comment, predicate: RDF::RDFS.comment
18
+ property :description, predicate: DCT.description
19
+ property :creator, predicate: DCT.creator
20
+ property :issued, predicate: DCT.issued
21
+ end
22
+ end
23
+ end
24
+ end
25
+ # rescue LoadError
26
+ # puts "spira not installed, ORM unavailable"
27
+ # end
@@ -35,6 +35,7 @@ class Prov
35
35
  def associated_with(agent=nil, &block)
36
36
  block_list(:associated,:associations,Association,Associations,agent,&block)
37
37
  end
38
+ alias_method :wasAssociatedWith, :associated_with
38
39
 
39
40
  def used(entity=nil, &block)
40
41
  block_list(:use,:usages,Usage,Usages,entity, &block)
@@ -23,6 +23,20 @@ module PubliSci
23
23
  end
24
24
  }
25
25
  end
26
+
27
+ def all_types
28
+ me = self.subject
29
+ type_query = RDF::Query.new do
30
+ pattern [me, RDF.type, :type]
31
+ end
32
+
33
+ type_query.execute(self.class.repository).map{|t| t[:type]}
34
+ end
35
+
36
+ def has_data?
37
+ all_types.include?('http://purl.org/linked-data/cube#DataSet')
38
+ end
39
+
26
40
  end
27
41
 
28
42
  class Agent < Spira::Base
@@ -31,15 +45,28 @@ module PubliSci
31
45
  type PROV.SoftwareAgent
32
46
  type PROV.Person
33
47
  property :label, predicate: RDF::RDFS.label
34
- property :wasGeneratedBy, predicate: PROV.wasGeneratedBy
35
48
  property :foaf_name, predicate: RDF::FOAF.name
36
49
  property :foaf_given, predicate: RDF::FOAF.givenName
37
- property :name, predicate: PROV.actedOnBehalfOf
38
50
  property :actedOnBehalfOf, predicate: PROV.actedOnBehalfOf
39
51
 
52
+
40
53
  def name
41
54
  foaf_given || foaf_name
42
55
  end
56
+
57
+ def name=(name)
58
+ foaf_given = name
59
+ foaf_name = name
60
+
61
+ end
62
+
63
+ def activities
64
+ #should do this in a SPARQL query instead
65
+ Activity.enum_for.map{|act|
66
+ subj = subject()
67
+ act if act.wasAssociatedWith.any?{|assoc| assoc == subj}
68
+ }.reject{|x| x==nil}
69
+ end
43
70
  end
44
71
 
45
72
  class Activity < Spira::Base
@@ -56,6 +83,10 @@ module PubliSci
56
83
  property :label, predicate: RDF::RDFS.label
57
84
  property :agent, predicate: PROV.agent
58
85
  property :hadPlan, predicate: PROV.hadPlan
86
+
87
+ def activity
88
+ Activity.each.to_a.select{|act| act.qualifiedAssociation.include? self}
89
+ end
59
90
  end
60
91
 
61
92
  class Derivation < Spira::Base
@@ -47,7 +47,7 @@ module PubliSci
47
47
  sparql.query(string)
48
48
  end
49
49
 
50
- def execute_from_file(file,store,type=:fourstore)
50
+ def execute_from_file(file,store,type=:fourstore,substitutions={})
51
51
  if File.exist?(file)
52
52
  string = IO.read(file)
53
53
  elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
@@ -57,6 +57,10 @@ module PubliSci
57
57
  else
58
58
  raise "couldn't find query for #{file}"
59
59
  end
60
+
61
+ substitutions.map{|k,v|
62
+ string = string.gsub(k,v)
63
+ }
60
64
  execute(string, store, type)
61
65
  end
62
66
 
@@ -2,14 +2,14 @@ module PubliSci
2
2
  module Reader
3
3
  class ARFF
4
4
  include PubliSci::Dataset::DataCube
5
+
5
6
  def generate_n3(arff, options={})
6
7
  arff = IO.read(arff) if File.exist? arff
7
8
  options[:no_labels] = true # unless options[:no_labels] == nil
8
9
  @options = options
9
10
  comps = components(arff)
10
11
  obs = data(arff, comps.keys)
11
- repl = generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
12
-
12
+ generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
13
13
  end
14
14
 
15
15
  def relation(arff)
@@ -44,44 +44,6 @@ module PubliSci
44
44
  }
45
45
  h
46
46
  end
47
-
48
- # def coded_dimensions
49
- # if @options[:codes]
50
- # @options[:codes]
51
- # elsif @options[:row_label]
52
- # [@options[:row_label]]
53
- # else
54
- # ["refRow"]
55
- # end
56
- # end
57
-
58
- # def measures
59
- # if @options[:dimensions]
60
- # if @options[:measures]
61
- # @options[:measures] - @options[:dimensions]
62
- # else
63
- # # @rexp.payload.names - @options[:dimensions]
64
- # end
65
- # else
66
- # @options[:measures] # || @rexp.payload.names
67
- # end
68
- # end
69
-
70
- # def observation_labels
71
- # # row_names = @rexp.attr.payload["row.names"].to_ruby
72
- # # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
73
- # # row_names
74
- # end
75
-
76
- # def observation_data
77
-
78
- # # data = {}
79
- # # @rexp.payload.names.map{|name|
80
- # # data[name] = @rexp.payload[name].to_ruby
81
- # # }
82
- # # data[@options[:row_label] || "refRow"] = observation_labels()
83
- # # data
84
- # end
85
47
  end
86
48
  end
87
49
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Reader
2
+ module Reader
3
3
  class Dataframe
4
4
  include PubliSci::Dataset::DataCube
5
5
  include PubliSci::Reader::Output
@@ -1,9 +1,9 @@
1
1
  module PubliSci
2
- module Writer
3
- class ARFF
4
- include PubliSci::Query
5
- include PubliSci::Parser
6
- include PubliSci::Analyzer
2
+ module Writers
3
+ class ARFF < Base
4
+ # include PubliSci::Query
5
+ # include PubliSci::Parser
6
+ # include PubliSci::Analyzer
7
7
 
8
8
  def build_arff(relation, attributes, data, source)
9
9
  str = <<-EOS
@@ -31,34 +31,60 @@ EOS
31
31
  repo = RDF::Repository.load(turtle_file)
32
32
  puts "loaded #{repo.size} statements into temporary repo" if verbose
33
33
 
34
- dims = execute_from_file("dimensions.rq",repo,:graph).to_h.map{|d| [d[:dimension].to_s, d[:label].to_s]}
35
- meas = execute_from_file("measures.rq",repo,:graph).to_h.map{|m| [m[:measure].to_s, m[:label].to_s]}
36
- relation = execute_from_file("dataset.rq",repo,:graph).to_h.first[:label].to_s
37
- codes = execute_from_file("codes.rq",repo,:graph).to_h.map{|e| e.values.map(&:to_s)}.inject({}){|h,el|
38
- (h[el.first]||=[]) << el.last; h
39
- }
34
+ dims = dimensions(repo)
35
+ meas = measures(repo)
36
+ data = observations(repo)
37
+
38
+ relation = dataSet(repo)
39
+ codes = codes(repo)
40
40
 
41
- data = observation_hash(execute_from_file("observations.rq",repo,:graph), true)
42
41
  attributes = {}
42
+
43
43
  (dims | meas).map{|component|
44
- attributes[component[1]] = case recommend_range(data.map{|o| o[1][component[1]]})
44
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
45
45
  when "xsd:int"
46
46
  "integer"
47
47
  when "xsd:double"
48
48
  "real"
49
49
  when :coded
50
50
  if dims.include? component
51
- "{#{codes[component[1]].join(',')}}"
51
+ "{#{codes[component].join(', ')}}"
52
52
  else
53
53
  "string"
54
54
  end
55
55
  end
56
56
  }
57
+
57
58
  build_arff(relation, attributes, data, turtle_file)
58
59
  end
59
60
 
60
- def from_store(endpoint_url,variable_in=nil, variable_out=nil, verbose=false)
61
- raise "not implemented yet"
61
+ def from_store(repo, dataset=nil, title=nil, verbose=false)
62
+ # data = observation_hash(execute_from_file("observations.rq",repo,:graph,{"%{dataSet}"=>"<#{dataSet}>"}), true)
63
+
64
+ dims = dimensions(repo,dataset)
65
+ meas = measures(repo,dataset)
66
+ data = observations(repo,dataset)
67
+ codes = codes(repo,dataset)
68
+ attributes = {}
69
+
70
+ (dims | meas).map{|component|
71
+ attributes[component] = case recommend_range(data.map{|o| o[1][component]})
72
+ when "xsd:int"
73
+ "integer"
74
+ when "xsd:double"
75
+ "real"
76
+ when :coded
77
+ if dims.include? component
78
+ "{#{codes[component].join(', ')}}"
79
+ else
80
+ "string"
81
+ end
82
+ end
83
+ }
84
+
85
+ dataset = dataSet(repo) unless dataset
86
+ title = dataset unless title
87
+ build_arff(title,attributes,data,dataset)
62
88
  end
63
89
  end
64
90
  end
@@ -0,0 +1,77 @@
1
+ module PubliSci
2
+ module Writers
3
+ class Base
4
+ include PubliSci::Query
5
+ include PubliSci::Parser
6
+ include PubliSci::Analyzer
7
+
8
+ def handle_input(input)
9
+ if input.is_a? String
10
+ if File.exist? input
11
+ RDF::Repository.load(input)
12
+ else
13
+ raise "UnkownStringInput: #{input}"
14
+ end
15
+ elsif input.is_a? RDF::Repository
16
+ input
17
+ else
18
+ raise "UnkownInput: #{input}, #{input.class}"
19
+ end
20
+ end
21
+
22
+ def dimensions(input, data_set=nil, select=:label)
23
+ repo = handle_input(input)
24
+
25
+ if data_set
26
+ dims = execute_from_file("dimensions.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
27
+ else
28
+ dims = execute_from_file("dimensions.rq",repo,:graph)
29
+ end
30
+
31
+ dims.to_h.map{|d| d[select].to_s}
32
+ end
33
+
34
+ def measures(input, data_set=nil, select=:label)
35
+ repo = handle_input(input)
36
+
37
+ if data_set
38
+ meas = execute_from_file("measures.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
39
+ else
40
+ meas = execute_from_file("measures.rq",repo,:graph)
41
+ end
42
+
43
+ meas.to_h.map{|d| d[select].to_s}
44
+ end
45
+
46
+ def observations(input, data_set = nil, shorten_url = true)
47
+ repo = handle_input(input)
48
+
49
+ if data_set
50
+ obs = execute_from_file("observations.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
51
+ else
52
+ obs = execute_from_file("observations.rq",repo,:graph)
53
+ end
54
+
55
+ observation_hash(obs,shorten_url)
56
+ end
57
+
58
+ def dataSet(input, select = :label)
59
+ repo = handle_input(input)
60
+
61
+ execute_from_file("dataset.rq",repo,:graph).to_h.first[select].to_s
62
+ end
63
+
64
+ def codes(input, data_set = nil, select = :label)
65
+ repo = handle_input(input)
66
+ if data_set
67
+ codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
68
+ else
69
+ codes = execute_from_file("codes.rq",repo,:graph).to_h
70
+ end
71
+ codes.map{|c| c.values.map(&:to_s)}.inject({}){|h,el|
72
+ (h[el.first]||=[]) << el.last; h
73
+ }
74
+ end
75
+ end
76
+ end
77
+ end