bio-publisci 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -1
- data/Rakefile +1 -1
- data/features/orm_steps.rb +4 -4
- data/features/reader.feature +3 -3
- data/features/reader_steps.rb +1 -0
- data/features/writer.feature +7 -2
- data/features/writer_steps.rb +8 -1
- data/lib/bio-publisci.rb +3 -1
- data/lib/bio-publisci/datacube_model.rb +46 -20
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +196 -194
- data/lib/bio-publisci/dataset/ORM/observation.rb +15 -13
- data/lib/bio-publisci/dataset/data_cube.rb +3 -3
- data/lib/bio-publisci/dataset/dataset_for.rb +25 -4
- data/lib/bio-publisci/dsl/dataset_dsl.rb +4 -2
- data/lib/bio-publisci/dsl/dsl.rb +3 -0
- data/lib/bio-publisci/metadata/generator.rb +1 -1
- data/lib/bio-publisci/metadata/metadata_model.rb +27 -0
- data/lib/bio-publisci/metadata/prov/activity.rb +1 -0
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +33 -2
- data/lib/bio-publisci/query/query_helper.rb +5 -1
- data/lib/bio-publisci/readers/arff.rb +2 -40
- data/lib/bio-publisci/readers/dataframe.rb +1 -1
- data/lib/bio-publisci/writers/arff.rb +42 -16
- data/lib/bio-publisci/writers/base.rb +77 -0
- data/lib/bio-publisci/writers/csv.rb +31 -0
- data/lib/bio-publisci/writers/dataframe.rb +2 -2
- data/resources/queries/codes.rq +10 -5
- data/resources/queries/dimensions.rq +9 -4
- data/resources/queries/measures.rq +7 -2
- data/resources/queries/observations.rq +5 -4
- data/resources/weather.numeric.arff +26 -21
- data/spec/ORM/data_cube_orm_spec.rb +23 -3
- data/spec/ORM/prov_model_spec.rb +53 -0
- data/spec/dataset_for_spec.rb +21 -0
- data/spec/dsl_spec.rb +5 -2
- data/spec/metadata/metadata_dsl_spec.rb +1 -1
- data/spec/r_builder_spec.rb +2 -2
- data/spec/turtle/bacon +1 -1
- data/spec/turtle/reference +1 -1
- data/spec/turtle/weather +275 -0
- data/spec/writer_spec.rb +61 -0
- metadata +66 -28
- checksums.yaml +0 -7
@@ -1,18 +1,20 @@
|
|
1
1
|
module PubliSci
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
module DataSet
|
3
|
+
module ORM
|
4
|
+
class Observation
|
5
|
+
attr_accessor :data
|
6
|
+
def initialize(data={})
|
7
|
+
@data = data
|
8
|
+
end
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
def method_missing(name, args)
|
11
|
+
#get entry of data hash
|
12
|
+
end
|
12
13
|
|
13
|
-
|
14
|
+
def respond_to_missing?(method, *)
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
18
20
|
end
|
@@ -121,7 +121,7 @@ module PubliSci
|
|
121
121
|
base = options[:base_url]
|
122
122
|
<<-EOF.unindent
|
123
123
|
@base <#{base}/ns/dc/> .
|
124
|
-
@prefix ns: <#{base}/ns/dataset/#{var}
|
124
|
+
@prefix ns: <#{base}/ns/dataset/#{var}/> .
|
125
125
|
@prefix qb: <http://purl.org/linked-data/cube#> .
|
126
126
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
127
127
|
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
@@ -342,7 +342,6 @@ module PubliSci
|
|
342
342
|
lists << str
|
343
343
|
}
|
344
344
|
|
345
|
-
|
346
345
|
lists
|
347
346
|
end
|
348
347
|
|
@@ -357,12 +356,13 @@ module PubliSci
|
|
357
356
|
else
|
358
357
|
refcode = code[0]
|
359
358
|
end
|
359
|
+
# puts data[refcode].uniq
|
360
360
|
data[refcode].uniq.each_with_index{|value,i|
|
361
361
|
unless value == nil && !options[:encode_nulls]
|
362
362
|
concepts << <<-EOF.unindent
|
363
363
|
#{to_resource(value,options)} a skos:Concept, #{code[2]};
|
364
364
|
skos:topConceptOf #{code[1]} ;
|
365
|
-
skos:prefLabel "#{strip_uri(
|
365
|
+
skos:prefLabel "#{strip_uri(value)}" ;
|
366
366
|
skos:inScheme #{code[1]} .
|
367
367
|
|
368
368
|
EOF
|
@@ -17,7 +17,7 @@ module PubliSci
|
|
17
17
|
elsif File.basename(object)[0] == '.' && File.basename(object).count('.') == 1
|
18
18
|
extension = File.basename(object)
|
19
19
|
else
|
20
|
-
raise "Can't load file #{object}; type inference not yet implemented"
|
20
|
+
raise "Can't load file #{object}; file type inference not yet implemented"
|
21
21
|
end
|
22
22
|
|
23
23
|
case extension
|
@@ -25,11 +25,17 @@ module PubliSci
|
|
25
25
|
r_object(object, options, ask_on_ambiguous)
|
26
26
|
when /.csv/i
|
27
27
|
PubliSci::Reader::CSV.new.automatic(object,nil,options,ask_on_ambiguous)
|
28
|
+
else
|
29
|
+
false
|
28
30
|
end
|
29
|
-
elsif object =~ %r{
|
30
|
-
self.for(download(object).path, options, ask_on_ambiguous)
|
31
|
+
elsif object =~ %r{htt(p|ps)://.+}
|
32
|
+
self.for(download(object).path, options, ask_on_ambiguous) || RDF::Statement.new(RDF::URI(object), RDF::URI('http://semanticscience.org/resource/hasValue'), IO.read(download(object).path)).to_s
|
33
|
+
# raise res
|
34
|
+
# self.for_remote(object)
|
31
35
|
else
|
32
|
-
raise "Unable to find reader for
|
36
|
+
raise "Unable to find reader for String '#{object}'"
|
37
|
+
# TODO: better handling of missing readers; need this way for raw strings for now
|
38
|
+
# false
|
33
39
|
end
|
34
40
|
elsif object.is_a? Rserve::REXP
|
35
41
|
r_object(object, options, ask_on_ambiguous)
|
@@ -38,6 +44,12 @@ module PubliSci
|
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
47
|
+
# def for_remote
|
48
|
+
# addr = object
|
49
|
+
# tmp = download(object)
|
50
|
+
# self.for(tmp.path) || "#{addr} <http://semanticscience.org/resource/"
|
51
|
+
# end
|
52
|
+
|
41
53
|
def self.download(uri)
|
42
54
|
out = Tempfile.new(uri.split('/').last)
|
43
55
|
out.write open(uri).read
|
@@ -45,6 +57,15 @@ module PubliSci
|
|
45
57
|
out
|
46
58
|
end
|
47
59
|
|
60
|
+
# private
|
61
|
+
# def self.reader_exists?(object)
|
62
|
+
# if object.is_a? String
|
63
|
+
# if File.exist? object
|
64
|
+
|
65
|
+
# elsif
|
66
|
+
# end
|
67
|
+
# end
|
68
|
+
|
48
69
|
def self.r_object(object, options={}, ask_on_ambiguous=true)
|
49
70
|
if object.is_a? String
|
50
71
|
con = Rserve::Connection.new
|
@@ -15,7 +15,7 @@ module PubliSci
|
|
15
15
|
# end
|
16
16
|
|
17
17
|
def object(file=nil)
|
18
|
-
|
18
|
+
add_or_get('object',file)
|
19
19
|
end
|
20
20
|
alias_method :source, :object
|
21
21
|
|
@@ -61,7 +61,9 @@ module PubliSci
|
|
61
61
|
if options
|
62
62
|
opts = opts.merge(options)
|
63
63
|
end
|
64
|
-
|
64
|
+
object().map{|obj|
|
65
|
+
Dataset.for(obj,opts,interact)
|
66
|
+
}.join("\n")
|
65
67
|
end
|
66
68
|
|
67
69
|
private
|
data/lib/bio-publisci/dsl/dsl.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
module PubliSci
|
2
|
+
class Metadata
|
3
|
+
module Model
|
4
|
+
PROV ||= RDF::Vocabulary.new(RDF::URI.new('http://www.w3.org/ns/prov#'))
|
5
|
+
QB ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
|
6
|
+
DCT ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/dc/terms/'))
|
7
|
+
# dct:title "#{fields[:title]}";
|
8
|
+
# dct:creator "#{fields[:creator]}";
|
9
|
+
# rdfs:comment "#{fields[:description]}";
|
10
|
+
# dct:description "#{fields[:description]}";
|
11
|
+
# dct:issued "#{fields[:date]}"^^xsd:date.
|
12
|
+
|
13
|
+
class Meta < Spira::Base
|
14
|
+
type PROV.Entity
|
15
|
+
type QB.DataSet
|
16
|
+
property :label, predicate: RDF::RDFS.label
|
17
|
+
property :comment, predicate: RDF::RDFS.comment
|
18
|
+
property :description, predicate: DCT.description
|
19
|
+
property :creator, predicate: DCT.creator
|
20
|
+
property :issued, predicate: DCT.issued
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
# rescue LoadError
|
26
|
+
# puts "spira not installed, ORM unavailable"
|
27
|
+
# end
|
@@ -35,6 +35,7 @@ class Prov
|
|
35
35
|
def associated_with(agent=nil, &block)
|
36
36
|
block_list(:associated,:associations,Association,Associations,agent,&block)
|
37
37
|
end
|
38
|
+
alias_method :wasAssociatedWith, :associated_with
|
38
39
|
|
39
40
|
def used(entity=nil, &block)
|
40
41
|
block_list(:use,:usages,Usage,Usages,entity, &block)
|
@@ -23,6 +23,20 @@ module PubliSci
|
|
23
23
|
end
|
24
24
|
}
|
25
25
|
end
|
26
|
+
|
27
|
+
def all_types
|
28
|
+
me = self.subject
|
29
|
+
type_query = RDF::Query.new do
|
30
|
+
pattern [me, RDF.type, :type]
|
31
|
+
end
|
32
|
+
|
33
|
+
type_query.execute(self.class.repository).map{|t| t[:type]}
|
34
|
+
end
|
35
|
+
|
36
|
+
def has_data?
|
37
|
+
all_types.include?('http://purl.org/linked-data/cube#DataSet')
|
38
|
+
end
|
39
|
+
|
26
40
|
end
|
27
41
|
|
28
42
|
class Agent < Spira::Base
|
@@ -31,15 +45,28 @@ module PubliSci
|
|
31
45
|
type PROV.SoftwareAgent
|
32
46
|
type PROV.Person
|
33
47
|
property :label, predicate: RDF::RDFS.label
|
34
|
-
property :wasGeneratedBy, predicate: PROV.wasGeneratedBy
|
35
48
|
property :foaf_name, predicate: RDF::FOAF.name
|
36
49
|
property :foaf_given, predicate: RDF::FOAF.givenName
|
37
|
-
property :name, predicate: PROV.actedOnBehalfOf
|
38
50
|
property :actedOnBehalfOf, predicate: PROV.actedOnBehalfOf
|
39
51
|
|
52
|
+
|
40
53
|
def name
|
41
54
|
foaf_given || foaf_name
|
42
55
|
end
|
56
|
+
|
57
|
+
def name=(name)
|
58
|
+
foaf_given = name
|
59
|
+
foaf_name = name
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
def activities
|
64
|
+
#should do this in a SPARQL query instead
|
65
|
+
Activity.enum_for.map{|act|
|
66
|
+
subj = subject()
|
67
|
+
act if act.wasAssociatedWith.any?{|assoc| assoc == subj}
|
68
|
+
}.reject{|x| x==nil}
|
69
|
+
end
|
43
70
|
end
|
44
71
|
|
45
72
|
class Activity < Spira::Base
|
@@ -56,6 +83,10 @@ module PubliSci
|
|
56
83
|
property :label, predicate: RDF::RDFS.label
|
57
84
|
property :agent, predicate: PROV.agent
|
58
85
|
property :hadPlan, predicate: PROV.hadPlan
|
86
|
+
|
87
|
+
def activity
|
88
|
+
Activity.each.to_a.select{|act| act.qualifiedAssociation.include? self}
|
89
|
+
end
|
59
90
|
end
|
60
91
|
|
61
92
|
class Derivation < Spira::Base
|
@@ -47,7 +47,7 @@ module PubliSci
|
|
47
47
|
sparql.query(string)
|
48
48
|
end
|
49
49
|
|
50
|
-
def execute_from_file(file,store,type=:fourstore)
|
50
|
+
def execute_from_file(file,store,type=:fourstore,substitutions={})
|
51
51
|
if File.exist?(file)
|
52
52
|
string = IO.read(file)
|
53
53
|
elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
|
@@ -57,6 +57,10 @@ module PubliSci
|
|
57
57
|
else
|
58
58
|
raise "couldn't find query for #{file}"
|
59
59
|
end
|
60
|
+
|
61
|
+
substitutions.map{|k,v|
|
62
|
+
string = string.gsub(k,v)
|
63
|
+
}
|
60
64
|
execute(string, store, type)
|
61
65
|
end
|
62
66
|
|
@@ -2,14 +2,14 @@ module PubliSci
|
|
2
2
|
module Reader
|
3
3
|
class ARFF
|
4
4
|
include PubliSci::Dataset::DataCube
|
5
|
+
|
5
6
|
def generate_n3(arff, options={})
|
6
7
|
arff = IO.read(arff) if File.exist? arff
|
7
8
|
options[:no_labels] = true # unless options[:no_labels] == nil
|
8
9
|
@options = options
|
9
10
|
comps = components(arff)
|
10
11
|
obs = data(arff, comps.keys)
|
11
|
-
|
12
|
-
|
12
|
+
generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
|
13
13
|
end
|
14
14
|
|
15
15
|
def relation(arff)
|
@@ -44,44 +44,6 @@ module PubliSci
|
|
44
44
|
}
|
45
45
|
h
|
46
46
|
end
|
47
|
-
|
48
|
-
# def coded_dimensions
|
49
|
-
# if @options[:codes]
|
50
|
-
# @options[:codes]
|
51
|
-
# elsif @options[:row_label]
|
52
|
-
# [@options[:row_label]]
|
53
|
-
# else
|
54
|
-
# ["refRow"]
|
55
|
-
# end
|
56
|
-
# end
|
57
|
-
|
58
|
-
# def measures
|
59
|
-
# if @options[:dimensions]
|
60
|
-
# if @options[:measures]
|
61
|
-
# @options[:measures] - @options[:dimensions]
|
62
|
-
# else
|
63
|
-
# # @rexp.payload.names - @options[:dimensions]
|
64
|
-
# end
|
65
|
-
# else
|
66
|
-
# @options[:measures] # || @rexp.payload.names
|
67
|
-
# end
|
68
|
-
# end
|
69
|
-
|
70
|
-
# def observation_labels
|
71
|
-
# # row_names = @rexp.attr.payload["row.names"].to_ruby
|
72
|
-
# # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
|
73
|
-
# # row_names
|
74
|
-
# end
|
75
|
-
|
76
|
-
# def observation_data
|
77
|
-
|
78
|
-
# # data = {}
|
79
|
-
# # @rexp.payload.names.map{|name|
|
80
|
-
# # data[name] = @rexp.payload[name].to_ruby
|
81
|
-
# # }
|
82
|
-
# # data[@options[:row_label] || "refRow"] = observation_labels()
|
83
|
-
# # data
|
84
|
-
# end
|
85
47
|
end
|
86
48
|
end
|
87
49
|
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
module PubliSci
|
2
|
-
module
|
3
|
-
class ARFF
|
4
|
-
include PubliSci::Query
|
5
|
-
include PubliSci::Parser
|
6
|
-
include PubliSci::Analyzer
|
2
|
+
module Writers
|
3
|
+
class ARFF < Base
|
4
|
+
# include PubliSci::Query
|
5
|
+
# include PubliSci::Parser
|
6
|
+
# include PubliSci::Analyzer
|
7
7
|
|
8
8
|
def build_arff(relation, attributes, data, source)
|
9
9
|
str = <<-EOS
|
@@ -31,34 +31,60 @@ EOS
|
|
31
31
|
repo = RDF::Repository.load(turtle_file)
|
32
32
|
puts "loaded #{repo.size} statements into temporary repo" if verbose
|
33
33
|
|
34
|
-
dims =
|
35
|
-
meas =
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
dims = dimensions(repo)
|
35
|
+
meas = measures(repo)
|
36
|
+
data = observations(repo)
|
37
|
+
|
38
|
+
relation = dataSet(repo)
|
39
|
+
codes = codes(repo)
|
40
40
|
|
41
|
-
data = observation_hash(execute_from_file("observations.rq",repo,:graph), true)
|
42
41
|
attributes = {}
|
42
|
+
|
43
43
|
(dims | meas).map{|component|
|
44
|
-
attributes[component
|
44
|
+
attributes[component] = case recommend_range(data.map{|o| o[1][component]})
|
45
45
|
when "xsd:int"
|
46
46
|
"integer"
|
47
47
|
when "xsd:double"
|
48
48
|
"real"
|
49
49
|
when :coded
|
50
50
|
if dims.include? component
|
51
|
-
"{#{codes[component
|
51
|
+
"{#{codes[component].join(', ')}}"
|
52
52
|
else
|
53
53
|
"string"
|
54
54
|
end
|
55
55
|
end
|
56
56
|
}
|
57
|
+
|
57
58
|
build_arff(relation, attributes, data, turtle_file)
|
58
59
|
end
|
59
60
|
|
60
|
-
def from_store(
|
61
|
-
|
61
|
+
def from_store(repo, dataset=nil, title=nil, verbose=false)
|
62
|
+
# data = observation_hash(execute_from_file("observations.rq",repo,:graph,{"%{dataSet}"=>"<#{dataSet}>"}), true)
|
63
|
+
|
64
|
+
dims = dimensions(repo,dataset)
|
65
|
+
meas = measures(repo,dataset)
|
66
|
+
data = observations(repo,dataset)
|
67
|
+
codes = codes(repo,dataset)
|
68
|
+
attributes = {}
|
69
|
+
|
70
|
+
(dims | meas).map{|component|
|
71
|
+
attributes[component] = case recommend_range(data.map{|o| o[1][component]})
|
72
|
+
when "xsd:int"
|
73
|
+
"integer"
|
74
|
+
when "xsd:double"
|
75
|
+
"real"
|
76
|
+
when :coded
|
77
|
+
if dims.include? component
|
78
|
+
"{#{codes[component].join(', ')}}"
|
79
|
+
else
|
80
|
+
"string"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
}
|
84
|
+
|
85
|
+
dataset = dataSet(repo) unless dataset
|
86
|
+
title = dataset unless title
|
87
|
+
build_arff(title,attributes,data,dataset)
|
62
88
|
end
|
63
89
|
end
|
64
90
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Writers
|
3
|
+
class Base
|
4
|
+
include PubliSci::Query
|
5
|
+
include PubliSci::Parser
|
6
|
+
include PubliSci::Analyzer
|
7
|
+
|
8
|
+
def handle_input(input)
|
9
|
+
if input.is_a? String
|
10
|
+
if File.exist? input
|
11
|
+
RDF::Repository.load(input)
|
12
|
+
else
|
13
|
+
raise "UnkownStringInput: #{input}"
|
14
|
+
end
|
15
|
+
elsif input.is_a? RDF::Repository
|
16
|
+
input
|
17
|
+
else
|
18
|
+
raise "UnkownInput: #{input}, #{input.class}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def dimensions(input, data_set=nil, select=:label)
|
23
|
+
repo = handle_input(input)
|
24
|
+
|
25
|
+
if data_set
|
26
|
+
dims = execute_from_file("dimensions.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
|
27
|
+
else
|
28
|
+
dims = execute_from_file("dimensions.rq",repo,:graph)
|
29
|
+
end
|
30
|
+
|
31
|
+
dims.to_h.map{|d| d[select].to_s}
|
32
|
+
end
|
33
|
+
|
34
|
+
def measures(input, data_set=nil, select=:label)
|
35
|
+
repo = handle_input(input)
|
36
|
+
|
37
|
+
if data_set
|
38
|
+
meas = execute_from_file("measures.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
|
39
|
+
else
|
40
|
+
meas = execute_from_file("measures.rq",repo,:graph)
|
41
|
+
end
|
42
|
+
|
43
|
+
meas.to_h.map{|d| d[select].to_s}
|
44
|
+
end
|
45
|
+
|
46
|
+
def observations(input, data_set = nil, shorten_url = true)
|
47
|
+
repo = handle_input(input)
|
48
|
+
|
49
|
+
if data_set
|
50
|
+
obs = execute_from_file("observations.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
|
51
|
+
else
|
52
|
+
obs = execute_from_file("observations.rq",repo,:graph)
|
53
|
+
end
|
54
|
+
|
55
|
+
observation_hash(obs,shorten_url)
|
56
|
+
end
|
57
|
+
|
58
|
+
def dataSet(input, select = :label)
|
59
|
+
repo = handle_input(input)
|
60
|
+
|
61
|
+
execute_from_file("dataset.rq",repo,:graph).to_h.first[select].to_s
|
62
|
+
end
|
63
|
+
|
64
|
+
def codes(input, data_set = nil, select = :label)
|
65
|
+
repo = handle_input(input)
|
66
|
+
if data_set
|
67
|
+
codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
|
68
|
+
else
|
69
|
+
codes = execute_from_file("codes.rq",repo,:graph).to_h
|
70
|
+
end
|
71
|
+
codes.map{|c| c.values.map(&:to_s)}.inject({}){|h,el|
|
72
|
+
(h[el.first]||=[]) << el.last; h
|
73
|
+
}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|