bio-publisci 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +24 -0
- data/LICENSE.txt +20 -0
- data/README.md +47 -0
- data/README.rdoc +48 -0
- data/Rakefile +70 -0
- data/bin/bio-publisci +83 -0
- data/features/create_generator.feature +25 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +60 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +9 -0
- data/features/writer_steps.rb +17 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/data_cube.rb +308 -0
- data/lib/bio-publisci/dataset/interactive.rb +57 -0
- data/lib/bio-publisci/loader.rb +36 -0
- data/lib/bio-publisci/metadata/metadata.rb +105 -0
- data/lib/bio-publisci/parser.rb +64 -0
- data/lib/bio-publisci/query/query_helper.rb +114 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +87 -0
- data/lib/bio-publisci/readers/big_cross.rb +119 -0
- data/lib/bio-publisci/readers/cross.rb +72 -0
- data/lib/bio-publisci/readers/csv.rb +54 -0
- data/lib/bio-publisci/readers/dataframe.rb +66 -0
- data/lib/bio-publisci/readers/r_matrix.rb +152 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +66 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci.rb +36 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/template_bak.rb +12 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/queries/codes.rq +13 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +7 -0
- data/resources/queries/measures.rq +7 -0
- data/resources/queries/observations.rq +12 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bio-publisci_spec.rb +7 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +166 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/r_matrix_spec.rb +35 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +149 -0
- data/spec/turtle/reference +2066 -0
- metadata +259 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
# This is temporary, just to help w/ development so I don't have to rewrite r2rdf.rb to be
|
2
|
+
# a standard gem base yet. Also load s the files instead of require for easy reloading
|
3
|
+
require 'tempfile'
|
4
|
+
require 'rdf'
|
5
|
+
require 'csv'
|
6
|
+
require 'rserve'
|
7
|
+
require 'sparql'
|
8
|
+
require 'sparql/client'
|
9
|
+
require 'rdf/turtle'
|
10
|
+
|
11
|
+
def load_folder(folder)
|
12
|
+
Dir.foreach(File.dirname(__FILE__) + "/#{folder}") do |file|
|
13
|
+
unless file == "." or file == ".."
|
14
|
+
load File.dirname(__FILE__) + "/#{folder}/" + file
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
load File.dirname(__FILE__) + '/dataset/interactive.rb'
|
20
|
+
load File.dirname(__FILE__) + '/query/query_helper.rb'
|
21
|
+
load File.dirname(__FILE__) + '/parser.rb'
|
22
|
+
load File.dirname(__FILE__) + '/r_client.rb'
|
23
|
+
load File.dirname(__FILE__) + '/analyzer.rb'
|
24
|
+
load File.dirname(__FILE__) + '/store.rb'
|
25
|
+
load File.dirname(__FILE__) + '/dataset/data_cube.rb'
|
26
|
+
|
27
|
+
|
28
|
+
load_folder('metadata')
|
29
|
+
load_folder('readers')
|
30
|
+
load_folder('writers')
|
31
|
+
load_folder('dataset/ORM')
|
32
|
+
# Dir.foreach(File.dirname(__FILE__) + '/generators') do |file|
|
33
|
+
# unless file == "." or file == ".."
|
34
|
+
# load File.dirname(__FILE__) + '/generators/' + file
|
35
|
+
# end
|
36
|
+
# end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
class String
|
2
|
+
def unindent
|
3
|
+
gsub /^#{self[/\A\s*/]}/, ''
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
module R2RDF
|
8
|
+
module Metadata
|
9
|
+
def defaults
|
10
|
+
{
|
11
|
+
encode_nulls: false,
|
12
|
+
base_url: "http://www.rqtl.org",
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def basic(fields, options={} )
|
17
|
+
#TODO don't assume base dataset is "ns:dataset-var",
|
18
|
+
#make it just "var", and try to make that clear to calling classes
|
19
|
+
|
20
|
+
fields[:var] = sanitize([fields[:var]]).first
|
21
|
+
options = defaults().merge(options)
|
22
|
+
str = <<-EOF.unindent
|
23
|
+
ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}";
|
24
|
+
dct:title "#{fields[:title]}";
|
25
|
+
dct:creator "#{fields[:creator]}";
|
26
|
+
rdfs:comment "#{fields[:description]}";
|
27
|
+
dct:description "#{fields[:description]}";
|
28
|
+
dct:issued "#{fields[:date]}"^^xsd:date;
|
29
|
+
EOF
|
30
|
+
|
31
|
+
end_str = ""
|
32
|
+
|
33
|
+
if fields[:subject] && fields[:subject].size > 0
|
34
|
+
str << "\tdct:subject \n"
|
35
|
+
fields[:subject].each{|subject| str << "\t\t" + subject + ",\n" }
|
36
|
+
str[-2] = ";"
|
37
|
+
end
|
38
|
+
|
39
|
+
if fields[:publishers]
|
40
|
+
fields[:publishers].map{|publisher|
|
41
|
+
raise "No URI for publisher #{publisher}" unless publisher[:uri]
|
42
|
+
raise "No label for publisher #{publisher}" unless publisher[:label]
|
43
|
+
str << "\tdct:publisher <#{publisher[:uri]}> ;\n"
|
44
|
+
end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n\trdfs:label \"#{publisher[:label]}\" .\n\n"
|
45
|
+
}
|
46
|
+
str[-2] = '.'
|
47
|
+
end
|
48
|
+
|
49
|
+
str + "\n" + end_str
|
50
|
+
end
|
51
|
+
|
52
|
+
def provenance(fields, options={})
|
53
|
+
#TODO: should either add a prefixes method or replace some with full URIs
|
54
|
+
var = sanitize([fields[:var]]).first
|
55
|
+
source_software = fields[:software] # software name, object type, optionally steps list for, eg, R
|
56
|
+
|
57
|
+
str = "qb:dataset-#{var} a prov:Entity.\n"
|
58
|
+
endstr = "qb:dataset-#{var} prov:wasGeneratredBy <#{options[:base_url]}/ns/R2RDF>\n" #replace once gem has an actual name
|
59
|
+
if source_software
|
60
|
+
source_software = [source_software] unless source_software.respond_to? :map
|
61
|
+
source_software.map{|soft|
|
62
|
+
str << "<#{options[:base_url]}/ns/prov/software/#{soft}> a prov:Entity .\n"
|
63
|
+
|
64
|
+
#Note: probably should say derived from the software object, then software object from software.
|
65
|
+
endstr << "qb:dataset-#{var} prov:wasDerivedFrom <#{options[:base_url]}/ns/prov/#{soft}> .\n"
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def r2rdf_metadata
|
71
|
+
str <<-EOF.unindent
|
72
|
+
<#{options[:base_url]}/ns/R2RDF> a foaf:Agent;
|
73
|
+
foaf:name "R2RDF Semantic Web Toolkit";
|
74
|
+
org:memberOf <http://sciruby.com/>
|
75
|
+
EOF
|
76
|
+
end
|
77
|
+
|
78
|
+
def org_metadata
|
79
|
+
str <<-EOF.unindent
|
80
|
+
<http://sciruby.com/> a org:Organization;
|
81
|
+
skos:prefLabel "SciRuby";
|
82
|
+
rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby".
|
83
|
+
EOF
|
84
|
+
end
|
85
|
+
|
86
|
+
def metadata_help(topic=nil)
|
87
|
+
if topic
|
88
|
+
puts "This should display help information for #{topic}, but there's none here yet :("
|
89
|
+
else
|
90
|
+
puts <<-EOF.unindent
|
91
|
+
Available metadata fields:
|
92
|
+
(Field) (Ontology) (Description)
|
93
|
+
|
94
|
+
publishers dct/foaf/org The Organization/s responsible for publishing the dataset
|
95
|
+
subject dct The subject of this dataset. Use resources when possible
|
96
|
+
var dct The name of the datset resource (used internally)
|
97
|
+
creator dct The person or process responsible for creating the dataset
|
98
|
+
description dct/rdfs A descriptions of the dataset
|
99
|
+
issued dct The date of issuance for the dataset
|
100
|
+
|
101
|
+
EOF
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Parser
|
3
|
+
def create_graph(string)
|
4
|
+
f = Tempfile.new('graph')
|
5
|
+
f.write(string)
|
6
|
+
f.close
|
7
|
+
graph = RDF::Graph.load(f.path, :format => :ttl)
|
8
|
+
f.unlink
|
9
|
+
graph
|
10
|
+
end
|
11
|
+
|
12
|
+
def get_ary(query_results,method='to_s')
|
13
|
+
query_results.map{|solution|
|
14
|
+
solution.to_a.map{|entry|
|
15
|
+
if entry.last.respond_to? method
|
16
|
+
entry.last.send(method)
|
17
|
+
else
|
18
|
+
entry.last.to_s
|
19
|
+
end
|
20
|
+
}
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_hashes(query_results,method=nil)
|
25
|
+
arr=[]
|
26
|
+
query_results.map{|solution|
|
27
|
+
h={}
|
28
|
+
solution.map{|element|
|
29
|
+
if method && element[1].respond_to?(method)
|
30
|
+
h[element[0]] = element[1].send(method)
|
31
|
+
else
|
32
|
+
h[element[0]] = element[1]
|
33
|
+
end
|
34
|
+
}
|
35
|
+
arr << h
|
36
|
+
}
|
37
|
+
arr
|
38
|
+
end
|
39
|
+
|
40
|
+
def observation_hash(query_results,shorten_uris=false,method='to_s')
|
41
|
+
h={}
|
42
|
+
query_results.map{|sol|
|
43
|
+
(h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
|
44
|
+
}
|
45
|
+
|
46
|
+
if shorten_uris
|
47
|
+
newh= {}
|
48
|
+
h.map{|k,v|
|
49
|
+
newh[strip_uri(k)] ||= {}
|
50
|
+
v.map{|kk,vv|
|
51
|
+
newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
|
52
|
+
}
|
53
|
+
}
|
54
|
+
newh
|
55
|
+
else
|
56
|
+
h
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def strip_uri(uri)
|
61
|
+
uri.to_s.split('/').last.split('#').last
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module RDF
|
2
|
+
class Query
|
3
|
+
class Solutions
|
4
|
+
def to_h
|
5
|
+
arr=[]
|
6
|
+
self.map{|solution|
|
7
|
+
h={}
|
8
|
+
solution.map{|element|
|
9
|
+
h[element[0]] = element[1]
|
10
|
+
}
|
11
|
+
arr << h
|
12
|
+
}
|
13
|
+
arr
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
module R2RDF
|
20
|
+
#.gsub(/^\s+/,'')
|
21
|
+
module Query
|
22
|
+
def vocabulary
|
23
|
+
{
|
24
|
+
base: RDF::Vocabulary.new('<http://www.rqtl.org/ns/#>'),
|
25
|
+
qb: RDF::Vocabulary.new("http://purl.org/linked-data/cube#"),
|
26
|
+
rdf: RDF::Vocabulary.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
|
27
|
+
rdfs: RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#'),
|
28
|
+
prop: RDF::Vocabulary.new('http://www.rqtl.org/dc/properties/'),
|
29
|
+
cs: RDF::Vocabulary.new('http://www.rqtl.org/dc/cs')
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
# def execute_internal(query,repo)
|
36
|
+
# SPARQL.execute(query,repo)
|
37
|
+
# end
|
38
|
+
|
39
|
+
def execute(string,store,type=:fourstore)
|
40
|
+
if type == :graph || store.is_a?(RDF::Graph) || store.is_a?(RDF::Repository)
|
41
|
+
sparql = SPARQL::Client.new(store)
|
42
|
+
elsif store.is_a? R2RDF::Store
|
43
|
+
sparql = SPARQL::Client.new(store.url+"/sparql/")
|
44
|
+
elsif type == :fourstore
|
45
|
+
sparql = SPARQL::Client.new(store+"/sparql/")
|
46
|
+
end
|
47
|
+
sparql.query(string)
|
48
|
+
end
|
49
|
+
|
50
|
+
def execute_from_file(file,store,type=:fourstore)
|
51
|
+
if File.exist?(file)
|
52
|
+
string = IO.read(file)
|
53
|
+
elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
|
54
|
+
string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
|
55
|
+
elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
|
56
|
+
string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
|
57
|
+
else
|
58
|
+
raise "couldn't find query for #{file}"
|
59
|
+
end
|
60
|
+
execute(string, store, type)
|
61
|
+
end
|
62
|
+
|
63
|
+
# def prefixes
|
64
|
+
# <<-EOF
|
65
|
+
# PREFIX ns: <http://www.rqtl.org/ns/#>
|
66
|
+
# PREFIX qb: <http://purl.org/linked-data/cube#>
|
67
|
+
# PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
68
|
+
# PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
69
|
+
# PREFIX prop: <http://www.rqtl.org/dc/properties/>
|
70
|
+
# PREFIX cs: <http://www.rqtl.org/dc/cs/>
|
71
|
+
# PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
72
|
+
|
73
|
+
# EOF
|
74
|
+
# end
|
75
|
+
|
76
|
+
def property_values(var, property)
|
77
|
+
str = prefixes
|
78
|
+
str << <<-EOS
|
79
|
+
SELECT ?val WHERE {
|
80
|
+
?obs qb:dataSet ns:dataset-#{var} ;
|
81
|
+
prop:#{property} ?val ;
|
82
|
+
}
|
83
|
+
EOS
|
84
|
+
str
|
85
|
+
end
|
86
|
+
|
87
|
+
def row_names(var)
|
88
|
+
str = prefixes
|
89
|
+
str << <<-EOS
|
90
|
+
SELECT ?label WHERE {
|
91
|
+
?obs qb:dataSet ns:dataset-#{var} ;
|
92
|
+
prop:refRow ?row .
|
93
|
+
?row skos:prefLabel ?label .
|
94
|
+
}
|
95
|
+
EOS
|
96
|
+
end
|
97
|
+
|
98
|
+
# Currently will say "___ Component", needs further parsing
|
99
|
+
def property_names(var)
|
100
|
+
str = prefixes
|
101
|
+
str << <<-EOS
|
102
|
+
SELECT DISTINCT ?label WHERE {
|
103
|
+
ns:dsd-#{var} qb:component ?c .
|
104
|
+
?c rdfs:label ?label
|
105
|
+
}
|
106
|
+
EOS
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
class QueryHelper
|
112
|
+
include R2RDF::Query
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Rconnect
|
3
|
+
|
4
|
+
def connect(address=nil)
|
5
|
+
if address
|
6
|
+
Rserve::Connection.new(address)
|
7
|
+
else
|
8
|
+
Rserve::Connection.new
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def load_workspace(connection,loc=Dir.home,file=".RData")
|
13
|
+
loc = File.join(loc,file)
|
14
|
+
connection.eval "load(\"#{loc}\")"
|
15
|
+
end
|
16
|
+
|
17
|
+
def get(connection, instruction)
|
18
|
+
connection.eval instruction
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_vars(connection)
|
22
|
+
connection.eval("ls()")
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
class Client
|
28
|
+
include R2RDF::Rconnect
|
29
|
+
attr :R
|
30
|
+
|
31
|
+
def initialize(auto=true, loc=Dir.home)
|
32
|
+
@R = connect
|
33
|
+
@loc = loc
|
34
|
+
load_ws if auto
|
35
|
+
puts "vars: #{vars.payload}" if auto
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_ws
|
39
|
+
load_workspace(@R, @loc)
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_var(var)
|
43
|
+
get(@R,var)
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_ws
|
47
|
+
"#{@loc}/.RData"
|
48
|
+
end
|
49
|
+
|
50
|
+
def vars
|
51
|
+
get_vars(@R)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class ARFF
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
def generate_n3(arff, options={})
|
6
|
+
arff = IO.read(arff) if File.exist? arff
|
7
|
+
options[:no_labels] = true # unless options[:no_labels] == nil
|
8
|
+
@options = options
|
9
|
+
comps = components(arff)
|
10
|
+
obs = data(arff, comps.keys)
|
11
|
+
repl = generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
def relation(arff)
|
16
|
+
arff.match(/@relation.+/i).to_a.first.split.last
|
17
|
+
end
|
18
|
+
|
19
|
+
def components(arff)
|
20
|
+
#still needs support for quoted strings with whitespace
|
21
|
+
h ={}
|
22
|
+
arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
|
23
|
+
if line =~ /\{.*}/
|
24
|
+
name = line.match(/\s.*/).to_a.first.strip.split.first
|
25
|
+
type = :coded
|
26
|
+
codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
|
27
|
+
h[name] = {type: type, codes: codes}
|
28
|
+
else
|
29
|
+
name = line.split[1]
|
30
|
+
type = line.split[2]
|
31
|
+
h[name] = {type: type}
|
32
|
+
end
|
33
|
+
}
|
34
|
+
h
|
35
|
+
end
|
36
|
+
|
37
|
+
def data(arff, attributes)
|
38
|
+
lines = arff.split("\n")
|
39
|
+
data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
|
40
|
+
h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
|
41
|
+
data_lines.map{|line|
|
42
|
+
line = line.split ','
|
43
|
+
attributes.each_with_index{|a,i| h[a] << line[i]}
|
44
|
+
}
|
45
|
+
h
|
46
|
+
end
|
47
|
+
|
48
|
+
# def coded_dimensions
|
49
|
+
# if @options[:codes]
|
50
|
+
# @options[:codes]
|
51
|
+
# elsif @options[:row_label]
|
52
|
+
# [@options[:row_label]]
|
53
|
+
# else
|
54
|
+
# ["refRow"]
|
55
|
+
# end
|
56
|
+
# end
|
57
|
+
|
58
|
+
# def measures
|
59
|
+
# if @options[:dimensions]
|
60
|
+
# if @options[:measures]
|
61
|
+
# @options[:measures] - @options[:dimensions]
|
62
|
+
# else
|
63
|
+
# # @rexp.payload.names - @options[:dimensions]
|
64
|
+
# end
|
65
|
+
# else
|
66
|
+
# @options[:measures] # || @rexp.payload.names
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
|
70
|
+
# def observation_labels
|
71
|
+
# # row_names = @rexp.attr.payload["row.names"].to_ruby
|
72
|
+
# # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
|
73
|
+
# # row_names
|
74
|
+
# end
|
75
|
+
|
76
|
+
# def observation_data
|
77
|
+
|
78
|
+
# # data = {}
|
79
|
+
# # @rexp.payload.names.map{|name|
|
80
|
+
# # data[name] = @rexp.payload[name].to_ruby
|
81
|
+
# # }
|
82
|
+
# # data[@options[:row_label] || "refRow"] = observation_labels()
|
83
|
+
# # data
|
84
|
+
# end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class BigCross
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
|
6
|
+
def generate_n3(client, var, outfile_base, options={})
|
7
|
+
meas = measures(client,var,options)
|
8
|
+
dim = dimensions(client,var,options)
|
9
|
+
codes = codes(client,var,options)
|
10
|
+
|
11
|
+
|
12
|
+
#write structure
|
13
|
+
open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
|
14
|
+
|
15
|
+
n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
|
16
|
+
chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
|
17
|
+
chromosome_list.map{|chrom|
|
18
|
+
open(outfile_base+"_#{chrom}.ttl",'w'){|f| f.write prefixes(var,options)}
|
19
|
+
entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
|
20
|
+
|
21
|
+
#get genotype data (currently only for chromosome 1)
|
22
|
+
# => puts "#{var}$geno$'#{chrom}'"
|
23
|
+
geno_chr = client.eval("#{var}$geno$'#{chrom}'")
|
24
|
+
|
25
|
+
#get number of markers per individual
|
26
|
+
|
27
|
+
#write observations
|
28
|
+
n_individuals.times{|indi|
|
29
|
+
#time ||= Time.now
|
30
|
+
obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
|
31
|
+
labels = labels_for(obs_data,chrom.to_s,indi)
|
32
|
+
open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
|
33
|
+
puts "(#{chrom}) #{indi}/#{n_individuals}" #(#{Time.now - time})
|
34
|
+
#time = Time.now
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
#generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
|
39
|
+
end
|
40
|
+
|
41
|
+
def structure(client,var,options={})
|
42
|
+
meas = measures(client,var,options)
|
43
|
+
dim = dimensions(client,var,options)
|
44
|
+
codes = codes(client,var,options)
|
45
|
+
|
46
|
+
str = prefixes(var,options)
|
47
|
+
str << data_structure_definition(meas,var,options)
|
48
|
+
str << dataset(var,options)
|
49
|
+
component_specifications(meas, dim, var, options).map{ |c| str << c }
|
50
|
+
measure_properties(meas,var,options).map{|m| str << m}
|
51
|
+
|
52
|
+
str
|
53
|
+
end
|
54
|
+
|
55
|
+
def measures(client, var, options={})
|
56
|
+
pheno_names = client.eval("names(#{var}$pheno)").to_ruby
|
57
|
+
if options[:measures]
|
58
|
+
(pheno_names & options[:measures]) | ["genotype","markerpos","marker"]
|
59
|
+
else
|
60
|
+
pheno_names | ["genotype","markerpos","marker"]
|
61
|
+
end
|
62
|
+
# measure_properties(measures,var,options)
|
63
|
+
end
|
64
|
+
|
65
|
+
def dimensions(client, var, options={})
|
66
|
+
# dimension_properties([""],var)
|
67
|
+
[]
|
68
|
+
end
|
69
|
+
|
70
|
+
def codes(client, var, options={})
|
71
|
+
[]
|
72
|
+
end
|
73
|
+
|
74
|
+
def labels_for(data,chr,individual,options={})
|
75
|
+
labels=(((data.first.last.size*individual)+1)..(data.first.last.size*(individual+1))).to_a.map(&:to_s)
|
76
|
+
labels.map{|l| l.insert(0,"#{chr}_")}
|
77
|
+
labels
|
78
|
+
end
|
79
|
+
|
80
|
+
def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
|
81
|
+
data = {}
|
82
|
+
# geno_chr = client.eval("#{var}$geno$'#{chr}'")
|
83
|
+
# n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
|
84
|
+
# entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
85
|
+
data["chr"] = []
|
86
|
+
data["genotype"] = []
|
87
|
+
data["individual"] = []
|
88
|
+
data["marker"] = []
|
89
|
+
data["markerpos"] = []
|
90
|
+
client.eval("names(#{var}$pheno)").to_ruby.map{|name|
|
91
|
+
data[name] = []
|
92
|
+
}
|
93
|
+
# n_individuals.times{|row_individ|
|
94
|
+
# puts "#{row_individ}/#{n_individuals}"
|
95
|
+
data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
|
96
|
+
|
97
|
+
client.eval("names(#{var}$pheno)").to_ruby.map{|name|
|
98
|
+
data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno").payload[name].to_ruby[row_individ])
|
99
|
+
}
|
100
|
+
# @rexp.payload["geno"].payload.names.map { |chr|
|
101
|
+
num_markers = geno_chr.payload.first.to_ruby.column_size
|
102
|
+
data["chr"] << (1..num_markers).to_a.fill(chr)
|
103
|
+
data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
|
104
|
+
data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
|
105
|
+
data["markerpos"] << geno_chr.payload["map"].to_a
|
106
|
+
# }
|
107
|
+
# }
|
108
|
+
data.map{|k,v| v.flatten!}
|
109
|
+
data
|
110
|
+
end
|
111
|
+
|
112
|
+
def num_individuals(client, var, options={})
|
113
|
+
client.eval("#{var}$pheno").payload.first.to_ruby.size
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class Cross
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
|
6
|
+
def generate_n3(rexp, var, options={})
|
7
|
+
@rexp = rexp
|
8
|
+
@options = options
|
9
|
+
generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def dimensions
|
13
|
+
["individual","chr","sex","marker"]
|
14
|
+
end
|
15
|
+
|
16
|
+
def codes
|
17
|
+
["individual","chr","sex","marker"]
|
18
|
+
end
|
19
|
+
|
20
|
+
def measures
|
21
|
+
((@rexp.payload["pheno"].payload.names - ["sex"]) | ["genotype","markerpos"])
|
22
|
+
end
|
23
|
+
|
24
|
+
def observation_labels
|
25
|
+
# row_names = @rexp.attr.payload["row.names"].to_ruby
|
26
|
+
# entries_per_individual = @rexp.payload["geno"].payload[0].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
27
|
+
entries_per_individual = 0
|
28
|
+
@rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
|
29
|
+
individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
|
30
|
+
(1..(entries_per_individual * individuals)).to_a
|
31
|
+
end
|
32
|
+
|
33
|
+
def observation_data
|
34
|
+
|
35
|
+
data = {}
|
36
|
+
n_individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
|
37
|
+
entries_per_individual = 0
|
38
|
+
@rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
|
39
|
+
# entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
40
|
+
data["chr"] = []
|
41
|
+
data["genotype"] = []
|
42
|
+
data["individual"] = []
|
43
|
+
data["marker"] = []
|
44
|
+
data["markerpos"] = []
|
45
|
+
@rexp.payload["pheno"].payload.names.map{|name|
|
46
|
+
data[name] = []
|
47
|
+
}
|
48
|
+
n_individuals.times{|row_individ|
|
49
|
+
# puts row_individ
|
50
|
+
data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
|
51
|
+
@rexp.payload["pheno"].payload.names.map{|name|
|
52
|
+
data[name] << (1..entries_per_individual).to_a.fill(@rexp.payload["pheno"].payload[name].to_ruby[row_individ])
|
53
|
+
}
|
54
|
+
@rexp.payload["geno"].payload.names.map { |chr|
|
55
|
+
geno_chr = @rexp.payload["geno"].payload[chr]
|
56
|
+
num_markers = geno_chr.payload.first.to_ruby.column_size
|
57
|
+
data["chr"] << (1..num_markers).to_a.fill(chr)
|
58
|
+
data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
|
59
|
+
data["marker"] << geno_chr.payload["map"].to_ruby.names
|
60
|
+
data["markerpos"] << geno_chr.payload["map"].to_a
|
61
|
+
}
|
62
|
+
}
|
63
|
+
# data["chr"].flatten!
|
64
|
+
# data["genotype"].flatten!
|
65
|
+
data.keys.map{|k| data[k].flatten!}
|
66
|
+
|
67
|
+
#data["refRow"] = observation_labels()
|
68
|
+
data
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|