bio-publisci 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +24 -0
- data/LICENSE.txt +20 -0
- data/README.md +47 -0
- data/README.rdoc +48 -0
- data/Rakefile +70 -0
- data/bin/bio-publisci +83 -0
- data/features/create_generator.feature +25 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +60 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +9 -0
- data/features/writer_steps.rb +17 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/data_cube.rb +308 -0
- data/lib/bio-publisci/dataset/interactive.rb +57 -0
- data/lib/bio-publisci/loader.rb +36 -0
- data/lib/bio-publisci/metadata/metadata.rb +105 -0
- data/lib/bio-publisci/parser.rb +64 -0
- data/lib/bio-publisci/query/query_helper.rb +114 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +87 -0
- data/lib/bio-publisci/readers/big_cross.rb +119 -0
- data/lib/bio-publisci/readers/cross.rb +72 -0
- data/lib/bio-publisci/readers/csv.rb +54 -0
- data/lib/bio-publisci/readers/dataframe.rb +66 -0
- data/lib/bio-publisci/readers/r_matrix.rb +152 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +66 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci.rb +36 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/template_bak.rb +12 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/queries/codes.rq +13 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +7 -0
- data/resources/queries/measures.rq +7 -0
- data/resources/queries/observations.rq +12 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bio-publisci_spec.rb +7 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +166 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/r_matrix_spec.rb +35 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +149 -0
- data/spec/turtle/reference +2066 -0
- metadata +259 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
# This is temporary, just to help w/ development so I don't have to rewrite r2rdf.rb to be
|
2
|
+
# a standard gem base yet. Also load s the files instead of require for easy reloading
|
3
|
+
require 'tempfile'
|
4
|
+
require 'rdf'
|
5
|
+
require 'csv'
|
6
|
+
require 'rserve'
|
7
|
+
require 'sparql'
|
8
|
+
require 'sparql/client'
|
9
|
+
require 'rdf/turtle'
|
10
|
+
|
11
|
+
def load_folder(folder)
|
12
|
+
Dir.foreach(File.dirname(__FILE__) + "/#{folder}") do |file|
|
13
|
+
unless file == "." or file == ".."
|
14
|
+
load File.dirname(__FILE__) + "/#{folder}/" + file
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
load File.dirname(__FILE__) + '/dataset/interactive.rb'
|
20
|
+
load File.dirname(__FILE__) + '/query/query_helper.rb'
|
21
|
+
load File.dirname(__FILE__) + '/parser.rb'
|
22
|
+
load File.dirname(__FILE__) + '/r_client.rb'
|
23
|
+
load File.dirname(__FILE__) + '/analyzer.rb'
|
24
|
+
load File.dirname(__FILE__) + '/store.rb'
|
25
|
+
load File.dirname(__FILE__) + '/dataset/data_cube.rb'
|
26
|
+
|
27
|
+
|
28
|
+
load_folder('metadata')
|
29
|
+
load_folder('readers')
|
30
|
+
load_folder('writers')
|
31
|
+
load_folder('dataset/ORM')
|
32
|
+
# Dir.foreach(File.dirname(__FILE__) + '/generators') do |file|
|
33
|
+
# unless file == "." or file == ".."
|
34
|
+
# load File.dirname(__FILE__) + '/generators/' + file
|
35
|
+
# end
|
36
|
+
# end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
class String
|
2
|
+
def unindent
|
3
|
+
gsub /^#{self[/\A\s*/]}/, ''
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
module R2RDF
|
8
|
+
module Metadata
|
9
|
+
def defaults
|
10
|
+
{
|
11
|
+
encode_nulls: false,
|
12
|
+
base_url: "http://www.rqtl.org",
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def basic(fields, options={} )
|
17
|
+
#TODO don't assume base dataset is "ns:dataset-var",
|
18
|
+
#make it just "var", and try to make that clear to calling classes
|
19
|
+
|
20
|
+
fields[:var] = sanitize([fields[:var]]).first
|
21
|
+
options = defaults().merge(options)
|
22
|
+
str = <<-EOF.unindent
|
23
|
+
ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}";
|
24
|
+
dct:title "#{fields[:title]}";
|
25
|
+
dct:creator "#{fields[:creator]}";
|
26
|
+
rdfs:comment "#{fields[:description]}";
|
27
|
+
dct:description "#{fields[:description]}";
|
28
|
+
dct:issued "#{fields[:date]}"^^xsd:date;
|
29
|
+
EOF
|
30
|
+
|
31
|
+
end_str = ""
|
32
|
+
|
33
|
+
if fields[:subject] && fields[:subject].size > 0
|
34
|
+
str << "\tdct:subject \n"
|
35
|
+
fields[:subject].each{|subject| str << "\t\t" + subject + ",\n" }
|
36
|
+
str[-2] = ";"
|
37
|
+
end
|
38
|
+
|
39
|
+
if fields[:publishers]
|
40
|
+
fields[:publishers].map{|publisher|
|
41
|
+
raise "No URI for publisher #{publisher}" unless publisher[:uri]
|
42
|
+
raise "No label for publisher #{publisher}" unless publisher[:label]
|
43
|
+
str << "\tdct:publisher <#{publisher[:uri]}> ;\n"
|
44
|
+
end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n\trdfs:label \"#{publisher[:label]}\" .\n\n"
|
45
|
+
}
|
46
|
+
str[-2] = '.'
|
47
|
+
end
|
48
|
+
|
49
|
+
str + "\n" + end_str
|
50
|
+
end
|
51
|
+
|
52
|
+
def provenance(fields, options={})
|
53
|
+
#TODO: should either add a prefixes method or replace some with full URIs
|
54
|
+
var = sanitize([fields[:var]]).first
|
55
|
+
source_software = fields[:software] # software name, object type, optionally steps list for, eg, R
|
56
|
+
|
57
|
+
str = "qb:dataset-#{var} a prov:Entity.\n"
|
58
|
+
endstr = "qb:dataset-#{var} prov:wasGeneratredBy <#{options[:base_url]}/ns/R2RDF>\n" #replace once gem has an actual name
|
59
|
+
if source_software
|
60
|
+
source_software = [source_software] unless source_software.respond_to? :map
|
61
|
+
source_software.map{|soft|
|
62
|
+
str << "<#{options[:base_url]}/ns/prov/software/#{soft}> a prov:Entity .\n"
|
63
|
+
|
64
|
+
#Note: probably should say derived from the software object, then software object from software.
|
65
|
+
endstr << "qb:dataset-#{var} prov:wasDerivedFrom <#{options[:base_url]}/ns/prov/#{soft}> .\n"
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def r2rdf_metadata
|
71
|
+
str <<-EOF.unindent
|
72
|
+
<#{options[:base_url]}/ns/R2RDF> a foaf:Agent;
|
73
|
+
foaf:name "R2RDF Semantic Web Toolkit";
|
74
|
+
org:memberOf <http://sciruby.com/>
|
75
|
+
EOF
|
76
|
+
end
|
77
|
+
|
78
|
+
def org_metadata
|
79
|
+
str <<-EOF.unindent
|
80
|
+
<http://sciruby.com/> a org:Organization;
|
81
|
+
skos:prefLabel "SciRuby";
|
82
|
+
rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby".
|
83
|
+
EOF
|
84
|
+
end
|
85
|
+
|
86
|
+
def metadata_help(topic=nil)
|
87
|
+
if topic
|
88
|
+
puts "This should display help information for #{topic}, but there's none here yet :("
|
89
|
+
else
|
90
|
+
puts <<-EOF.unindent
|
91
|
+
Available metadata fields:
|
92
|
+
(Field) (Ontology) (Description)
|
93
|
+
|
94
|
+
publishers dct/foaf/org The Organization/s responsible for publishing the dataset
|
95
|
+
subject dct The subject of this dataset. Use resources when possible
|
96
|
+
var dct The name of the datset resource (used internally)
|
97
|
+
creator dct The person or process responsible for creating the dataset
|
98
|
+
description dct/rdfs A descriptions of the dataset
|
99
|
+
issued dct The date of issuance for the dataset
|
100
|
+
|
101
|
+
EOF
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Parser
|
3
|
+
def create_graph(string)
|
4
|
+
f = Tempfile.new('graph')
|
5
|
+
f.write(string)
|
6
|
+
f.close
|
7
|
+
graph = RDF::Graph.load(f.path, :format => :ttl)
|
8
|
+
f.unlink
|
9
|
+
graph
|
10
|
+
end
|
11
|
+
|
12
|
+
def get_ary(query_results,method='to_s')
|
13
|
+
query_results.map{|solution|
|
14
|
+
solution.to_a.map{|entry|
|
15
|
+
if entry.last.respond_to? method
|
16
|
+
entry.last.send(method)
|
17
|
+
else
|
18
|
+
entry.last.to_s
|
19
|
+
end
|
20
|
+
}
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_hashes(query_results,method=nil)
|
25
|
+
arr=[]
|
26
|
+
query_results.map{|solution|
|
27
|
+
h={}
|
28
|
+
solution.map{|element|
|
29
|
+
if method && element[1].respond_to?(method)
|
30
|
+
h[element[0]] = element[1].send(method)
|
31
|
+
else
|
32
|
+
h[element[0]] = element[1]
|
33
|
+
end
|
34
|
+
}
|
35
|
+
arr << h
|
36
|
+
}
|
37
|
+
arr
|
38
|
+
end
|
39
|
+
|
40
|
+
def observation_hash(query_results,shorten_uris=false,method='to_s')
|
41
|
+
h={}
|
42
|
+
query_results.map{|sol|
|
43
|
+
(h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
|
44
|
+
}
|
45
|
+
|
46
|
+
if shorten_uris
|
47
|
+
newh= {}
|
48
|
+
h.map{|k,v|
|
49
|
+
newh[strip_uri(k)] ||= {}
|
50
|
+
v.map{|kk,vv|
|
51
|
+
newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
|
52
|
+
}
|
53
|
+
}
|
54
|
+
newh
|
55
|
+
else
|
56
|
+
h
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def strip_uri(uri)
|
61
|
+
uri.to_s.split('/').last.split('#').last
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module RDF
|
2
|
+
class Query
|
3
|
+
class Solutions
|
4
|
+
def to_h
|
5
|
+
arr=[]
|
6
|
+
self.map{|solution|
|
7
|
+
h={}
|
8
|
+
solution.map{|element|
|
9
|
+
h[element[0]] = element[1]
|
10
|
+
}
|
11
|
+
arr << h
|
12
|
+
}
|
13
|
+
arr
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
module R2RDF
|
20
|
+
#.gsub(/^\s+/,'')
|
21
|
+
module Query
|
22
|
+
def vocabulary
|
23
|
+
{
|
24
|
+
base: RDF::Vocabulary.new('<http://www.rqtl.org/ns/#>'),
|
25
|
+
qb: RDF::Vocabulary.new("http://purl.org/linked-data/cube#"),
|
26
|
+
rdf: RDF::Vocabulary.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
|
27
|
+
rdfs: RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#'),
|
28
|
+
prop: RDF::Vocabulary.new('http://www.rqtl.org/dc/properties/'),
|
29
|
+
cs: RDF::Vocabulary.new('http://www.rqtl.org/dc/cs')
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
# def execute_internal(query,repo)
|
36
|
+
# SPARQL.execute(query,repo)
|
37
|
+
# end
|
38
|
+
|
39
|
+
def execute(string,store,type=:fourstore)
|
40
|
+
if type == :graph || store.is_a?(RDF::Graph) || store.is_a?(RDF::Repository)
|
41
|
+
sparql = SPARQL::Client.new(store)
|
42
|
+
elsif store.is_a? R2RDF::Store
|
43
|
+
sparql = SPARQL::Client.new(store.url+"/sparql/")
|
44
|
+
elsif type == :fourstore
|
45
|
+
sparql = SPARQL::Client.new(store+"/sparql/")
|
46
|
+
end
|
47
|
+
sparql.query(string)
|
48
|
+
end
|
49
|
+
|
50
|
+
def execute_from_file(file,store,type=:fourstore)
|
51
|
+
if File.exist?(file)
|
52
|
+
string = IO.read(file)
|
53
|
+
elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
|
54
|
+
string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
|
55
|
+
elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
|
56
|
+
string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
|
57
|
+
else
|
58
|
+
raise "couldn't find query for #{file}"
|
59
|
+
end
|
60
|
+
execute(string, store, type)
|
61
|
+
end
|
62
|
+
|
63
|
+
# def prefixes
|
64
|
+
# <<-EOF
|
65
|
+
# PREFIX ns: <http://www.rqtl.org/ns/#>
|
66
|
+
# PREFIX qb: <http://purl.org/linked-data/cube#>
|
67
|
+
# PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
68
|
+
# PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
69
|
+
# PREFIX prop: <http://www.rqtl.org/dc/properties/>
|
70
|
+
# PREFIX cs: <http://www.rqtl.org/dc/cs/>
|
71
|
+
# PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
72
|
+
|
73
|
+
# EOF
|
74
|
+
# end
|
75
|
+
|
76
|
+
def property_values(var, property)
|
77
|
+
str = prefixes
|
78
|
+
str << <<-EOS
|
79
|
+
SELECT ?val WHERE {
|
80
|
+
?obs qb:dataSet ns:dataset-#{var} ;
|
81
|
+
prop:#{property} ?val ;
|
82
|
+
}
|
83
|
+
EOS
|
84
|
+
str
|
85
|
+
end
|
86
|
+
|
87
|
+
def row_names(var)
|
88
|
+
str = prefixes
|
89
|
+
str << <<-EOS
|
90
|
+
SELECT ?label WHERE {
|
91
|
+
?obs qb:dataSet ns:dataset-#{var} ;
|
92
|
+
prop:refRow ?row .
|
93
|
+
?row skos:prefLabel ?label .
|
94
|
+
}
|
95
|
+
EOS
|
96
|
+
end
|
97
|
+
|
98
|
+
# Currently will say "___ Component", needs further parsing
|
99
|
+
def property_names(var)
|
100
|
+
str = prefixes
|
101
|
+
str << <<-EOS
|
102
|
+
SELECT DISTINCT ?label WHERE {
|
103
|
+
ns:dsd-#{var} qb:component ?c .
|
104
|
+
?c rdfs:label ?label
|
105
|
+
}
|
106
|
+
EOS
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
class QueryHelper
|
112
|
+
include R2RDF::Query
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Rconnect
|
3
|
+
|
4
|
+
def connect(address=nil)
|
5
|
+
if address
|
6
|
+
Rserve::Connection.new(address)
|
7
|
+
else
|
8
|
+
Rserve::Connection.new
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def load_workspace(connection,loc=Dir.home,file=".RData")
|
13
|
+
loc = File.join(loc,file)
|
14
|
+
connection.eval "load(\"#{loc}\")"
|
15
|
+
end
|
16
|
+
|
17
|
+
def get(connection, instruction)
|
18
|
+
connection.eval instruction
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_vars(connection)
|
22
|
+
connection.eval("ls()")
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
class Client
|
28
|
+
include R2RDF::Rconnect
|
29
|
+
attr :R
|
30
|
+
|
31
|
+
def initialize(auto=true, loc=Dir.home)
|
32
|
+
@R = connect
|
33
|
+
@loc = loc
|
34
|
+
load_ws if auto
|
35
|
+
puts "vars: #{vars.payload}" if auto
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_ws
|
39
|
+
load_workspace(@R, @loc)
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_var(var)
|
43
|
+
get(@R,var)
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_ws
|
47
|
+
"#{@loc}/.RData"
|
48
|
+
end
|
49
|
+
|
50
|
+
def vars
|
51
|
+
get_vars(@R)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class ARFF
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
def generate_n3(arff, options={})
|
6
|
+
arff = IO.read(arff) if File.exist? arff
|
7
|
+
options[:no_labels] = true # unless options[:no_labels] == nil
|
8
|
+
@options = options
|
9
|
+
comps = components(arff)
|
10
|
+
obs = data(arff, comps.keys)
|
11
|
+
repl = generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
def relation(arff)
|
16
|
+
arff.match(/@relation.+/i).to_a.first.split.last
|
17
|
+
end
|
18
|
+
|
19
|
+
def components(arff)
|
20
|
+
#still needs support for quoted strings with whitespace
|
21
|
+
h ={}
|
22
|
+
arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
|
23
|
+
if line =~ /\{.*}/
|
24
|
+
name = line.match(/\s.*/).to_a.first.strip.split.first
|
25
|
+
type = :coded
|
26
|
+
codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
|
27
|
+
h[name] = {type: type, codes: codes}
|
28
|
+
else
|
29
|
+
name = line.split[1]
|
30
|
+
type = line.split[2]
|
31
|
+
h[name] = {type: type}
|
32
|
+
end
|
33
|
+
}
|
34
|
+
h
|
35
|
+
end
|
36
|
+
|
37
|
+
def data(arff, attributes)
|
38
|
+
lines = arff.split("\n")
|
39
|
+
data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
|
40
|
+
h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
|
41
|
+
data_lines.map{|line|
|
42
|
+
line = line.split ','
|
43
|
+
attributes.each_with_index{|a,i| h[a] << line[i]}
|
44
|
+
}
|
45
|
+
h
|
46
|
+
end
|
47
|
+
|
48
|
+
# def coded_dimensions
|
49
|
+
# if @options[:codes]
|
50
|
+
# @options[:codes]
|
51
|
+
# elsif @options[:row_label]
|
52
|
+
# [@options[:row_label]]
|
53
|
+
# else
|
54
|
+
# ["refRow"]
|
55
|
+
# end
|
56
|
+
# end
|
57
|
+
|
58
|
+
# def measures
|
59
|
+
# if @options[:dimensions]
|
60
|
+
# if @options[:measures]
|
61
|
+
# @options[:measures] - @options[:dimensions]
|
62
|
+
# else
|
63
|
+
# # @rexp.payload.names - @options[:dimensions]
|
64
|
+
# end
|
65
|
+
# else
|
66
|
+
# @options[:measures] # || @rexp.payload.names
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
|
70
|
+
# def observation_labels
|
71
|
+
# # row_names = @rexp.attr.payload["row.names"].to_ruby
|
72
|
+
# # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
|
73
|
+
# # row_names
|
74
|
+
# end
|
75
|
+
|
76
|
+
# def observation_data
|
77
|
+
|
78
|
+
# # data = {}
|
79
|
+
# # @rexp.payload.names.map{|name|
|
80
|
+
# # data[name] = @rexp.payload[name].to_ruby
|
81
|
+
# # }
|
82
|
+
# # data[@options[:row_label] || "refRow"] = observation_labels()
|
83
|
+
# # data
|
84
|
+
# end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class BigCross
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
|
6
|
+
def generate_n3(client, var, outfile_base, options={})
|
7
|
+
meas = measures(client,var,options)
|
8
|
+
dim = dimensions(client,var,options)
|
9
|
+
codes = codes(client,var,options)
|
10
|
+
|
11
|
+
|
12
|
+
#write structure
|
13
|
+
open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
|
14
|
+
|
15
|
+
n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
|
16
|
+
chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
|
17
|
+
chromosome_list.map{|chrom|
|
18
|
+
open(outfile_base+"_#{chrom}.ttl",'w'){|f| f.write prefixes(var,options)}
|
19
|
+
entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
|
20
|
+
|
21
|
+
#get genotype data (currently only for chromosome 1)
|
22
|
+
# => puts "#{var}$geno$'#{chrom}'"
|
23
|
+
geno_chr = client.eval("#{var}$geno$'#{chrom}'")
|
24
|
+
|
25
|
+
#get number of markers per individual
|
26
|
+
|
27
|
+
#write observations
|
28
|
+
n_individuals.times{|indi|
|
29
|
+
#time ||= Time.now
|
30
|
+
obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
|
31
|
+
labels = labels_for(obs_data,chrom.to_s,indi)
|
32
|
+
open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
|
33
|
+
puts "(#{chrom}) #{indi}/#{n_individuals}" #(#{Time.now - time})
|
34
|
+
#time = Time.now
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
#generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
|
39
|
+
end
|
40
|
+
|
41
|
+
def structure(client,var,options={})
|
42
|
+
meas = measures(client,var,options)
|
43
|
+
dim = dimensions(client,var,options)
|
44
|
+
codes = codes(client,var,options)
|
45
|
+
|
46
|
+
str = prefixes(var,options)
|
47
|
+
str << data_structure_definition(meas,var,options)
|
48
|
+
str << dataset(var,options)
|
49
|
+
component_specifications(meas, dim, var, options).map{ |c| str << c }
|
50
|
+
measure_properties(meas,var,options).map{|m| str << m}
|
51
|
+
|
52
|
+
str
|
53
|
+
end
|
54
|
+
|
55
|
+
def measures(client, var, options={})
|
56
|
+
pheno_names = client.eval("names(#{var}$pheno)").to_ruby
|
57
|
+
if options[:measures]
|
58
|
+
(pheno_names & options[:measures]) | ["genotype","markerpos","marker"]
|
59
|
+
else
|
60
|
+
pheno_names | ["genotype","markerpos","marker"]
|
61
|
+
end
|
62
|
+
# measure_properties(measures,var,options)
|
63
|
+
end
|
64
|
+
|
65
|
+
def dimensions(client, var, options={})
|
66
|
+
# dimension_properties([""],var)
|
67
|
+
[]
|
68
|
+
end
|
69
|
+
|
70
|
+
def codes(client, var, options={})
|
71
|
+
[]
|
72
|
+
end
|
73
|
+
|
74
|
+
def labels_for(data,chr,individual,options={})
|
75
|
+
labels=(((data.first.last.size*individual)+1)..(data.first.last.size*(individual+1))).to_a.map(&:to_s)
|
76
|
+
labels.map{|l| l.insert(0,"#{chr}_")}
|
77
|
+
labels
|
78
|
+
end
|
79
|
+
|
80
|
+
def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
|
81
|
+
data = {}
|
82
|
+
# geno_chr = client.eval("#{var}$geno$'#{chr}'")
|
83
|
+
# n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
|
84
|
+
# entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
85
|
+
data["chr"] = []
|
86
|
+
data["genotype"] = []
|
87
|
+
data["individual"] = []
|
88
|
+
data["marker"] = []
|
89
|
+
data["markerpos"] = []
|
90
|
+
client.eval("names(#{var}$pheno)").to_ruby.map{|name|
|
91
|
+
data[name] = []
|
92
|
+
}
|
93
|
+
# n_individuals.times{|row_individ|
|
94
|
+
# puts "#{row_individ}/#{n_individuals}"
|
95
|
+
data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
|
96
|
+
|
97
|
+
client.eval("names(#{var}$pheno)").to_ruby.map{|name|
|
98
|
+
data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno").payload[name].to_ruby[row_individ])
|
99
|
+
}
|
100
|
+
# @rexp.payload["geno"].payload.names.map { |chr|
|
101
|
+
num_markers = geno_chr.payload.first.to_ruby.column_size
|
102
|
+
data["chr"] << (1..num_markers).to_a.fill(chr)
|
103
|
+
data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
|
104
|
+
data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
|
105
|
+
data["markerpos"] << geno_chr.payload["map"].to_a
|
106
|
+
# }
|
107
|
+
# }
|
108
|
+
data.map{|k,v| v.flatten!}
|
109
|
+
data
|
110
|
+
end
|
111
|
+
|
112
|
+
def num_individuals(client, var, options={})
|
113
|
+
client.eval("#{var}$pheno").payload.first.to_ruby.size
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class Cross
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
|
6
|
+
def generate_n3(rexp, var, options={})
|
7
|
+
@rexp = rexp
|
8
|
+
@options = options
|
9
|
+
generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def dimensions
|
13
|
+
["individual","chr","sex","marker"]
|
14
|
+
end
|
15
|
+
|
16
|
+
def codes
|
17
|
+
["individual","chr","sex","marker"]
|
18
|
+
end
|
19
|
+
|
20
|
+
def measures
|
21
|
+
((@rexp.payload["pheno"].payload.names - ["sex"]) | ["genotype","markerpos"])
|
22
|
+
end
|
23
|
+
|
24
|
+
def observation_labels
|
25
|
+
# row_names = @rexp.attr.payload["row.names"].to_ruby
|
26
|
+
# entries_per_individual = @rexp.payload["geno"].payload[0].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
27
|
+
entries_per_individual = 0
|
28
|
+
@rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
|
29
|
+
individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
|
30
|
+
(1..(entries_per_individual * individuals)).to_a
|
31
|
+
end
|
32
|
+
|
33
|
+
def observation_data
|
34
|
+
|
35
|
+
data = {}
|
36
|
+
n_individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
|
37
|
+
entries_per_individual = 0
|
38
|
+
@rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
|
39
|
+
# entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
40
|
+
data["chr"] = []
|
41
|
+
data["genotype"] = []
|
42
|
+
data["individual"] = []
|
43
|
+
data["marker"] = []
|
44
|
+
data["markerpos"] = []
|
45
|
+
@rexp.payload["pheno"].payload.names.map{|name|
|
46
|
+
data[name] = []
|
47
|
+
}
|
48
|
+
n_individuals.times{|row_individ|
|
49
|
+
# puts row_individ
|
50
|
+
data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
|
51
|
+
@rexp.payload["pheno"].payload.names.map{|name|
|
52
|
+
data[name] << (1..entries_per_individual).to_a.fill(@rexp.payload["pheno"].payload[name].to_ruby[row_individ])
|
53
|
+
}
|
54
|
+
@rexp.payload["geno"].payload.names.map { |chr|
|
55
|
+
geno_chr = @rexp.payload["geno"].payload[chr]
|
56
|
+
num_markers = geno_chr.payload.first.to_ruby.column_size
|
57
|
+
data["chr"] << (1..num_markers).to_a.fill(chr)
|
58
|
+
data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
|
59
|
+
data["marker"] << geno_chr.payload["map"].to_ruby.names
|
60
|
+
data["markerpos"] << geno_chr.payload["map"].to_a
|
61
|
+
}
|
62
|
+
}
|
63
|
+
# data["chr"].flatten!
|
64
|
+
# data["genotype"].flatten!
|
65
|
+
data.keys.map{|k| data[k].flatten!}
|
66
|
+
|
67
|
+
#data["refRow"] = observation_labels()
|
68
|
+
data
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|