bio-publisci 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/.travis.yml +13 -0
  4. data/Gemfile +24 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.md +47 -0
  7. data/README.rdoc +48 -0
  8. data/Rakefile +70 -0
  9. data/bin/bio-publisci +83 -0
  10. data/features/create_generator.feature +25 -0
  11. data/features/integration.feature +12 -0
  12. data/features/integration_steps.rb +10 -0
  13. data/features/orm.feature +60 -0
  14. data/features/orm_steps.rb +74 -0
  15. data/features/reader.feature +25 -0
  16. data/features/reader_steps.rb +60 -0
  17. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  18. data/features/store.feature +27 -0
  19. data/features/store_steps.rb +42 -0
  20. data/features/support/env.rb +13 -0
  21. data/features/writer.feature +9 -0
  22. data/features/writer_steps.rb +17 -0
  23. data/lib/bio-publisci/analyzer.rb +57 -0
  24. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
  25. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  26. data/lib/bio-publisci/dataset/data_cube.rb +308 -0
  27. data/lib/bio-publisci/dataset/interactive.rb +57 -0
  28. data/lib/bio-publisci/loader.rb +36 -0
  29. data/lib/bio-publisci/metadata/metadata.rb +105 -0
  30. data/lib/bio-publisci/parser.rb +64 -0
  31. data/lib/bio-publisci/query/query_helper.rb +114 -0
  32. data/lib/bio-publisci/r_client.rb +54 -0
  33. data/lib/bio-publisci/readers/arff.rb +87 -0
  34. data/lib/bio-publisci/readers/big_cross.rb +119 -0
  35. data/lib/bio-publisci/readers/cross.rb +72 -0
  36. data/lib/bio-publisci/readers/csv.rb +54 -0
  37. data/lib/bio-publisci/readers/dataframe.rb +66 -0
  38. data/lib/bio-publisci/readers/r_matrix.rb +152 -0
  39. data/lib/bio-publisci/store.rb +56 -0
  40. data/lib/bio-publisci/writers/arff.rb +66 -0
  41. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  42. data/lib/bio-publisci.rb +36 -0
  43. data/lib/r2rdf.rb +226 -0
  44. data/lib/template_bak/publisci.rb +3 -0
  45. data/lib/template_bak.rb +12 -0
  46. data/lib/vocabs/cc.rb +18 -0
  47. data/lib/vocabs/cert.rb +13 -0
  48. data/lib/vocabs/dc.rb +63 -0
  49. data/lib/vocabs/dc11.rb +23 -0
  50. data/lib/vocabs/doap.rb +45 -0
  51. data/lib/vocabs/exif.rb +168 -0
  52. data/lib/vocabs/foaf.rb +69 -0
  53. data/lib/vocabs/geo.rb +13 -0
  54. data/lib/vocabs/http.rb +26 -0
  55. data/lib/vocabs/ma.rb +78 -0
  56. data/lib/vocabs/owl.rb +59 -0
  57. data/lib/vocabs/rdfs.rb +17 -0
  58. data/lib/vocabs/rsa.rb +12 -0
  59. data/lib/vocabs/rss.rb +14 -0
  60. data/lib/vocabs/sioc.rb +93 -0
  61. data/lib/vocabs/skos.rb +36 -0
  62. data/lib/vocabs/wot.rb +21 -0
  63. data/lib/vocabs/xhtml.rb +9 -0
  64. data/lib/vocabs/xsd.rb +58 -0
  65. data/resources/queries/codes.rq +13 -0
  66. data/resources/queries/dataset.rq +7 -0
  67. data/resources/queries/dimension_ranges.rq +8 -0
  68. data/resources/queries/dimensions.rq +7 -0
  69. data/resources/queries/measures.rq +7 -0
  70. data/resources/queries/observations.rq +12 -0
  71. data/resources/queries/test.rq +3 -0
  72. data/resources/weather.numeric.arff +23 -0
  73. data/spec/analyzer_spec.rb +36 -0
  74. data/spec/bio-publisci_spec.rb +7 -0
  75. data/spec/csv/bacon.csv +4 -0
  76. data/spec/csv/moar_bacon.csv +11 -0
  77. data/spec/data_cube_spec.rb +166 -0
  78. data/spec/generators/csv_spec.rb +44 -0
  79. data/spec/generators/dataframe_spec.rb +44 -0
  80. data/spec/generators/r_matrix_spec.rb +35 -0
  81. data/spec/queries/integrity/1.rq +21 -0
  82. data/spec/queries/integrity/11.rq +29 -0
  83. data/spec/queries/integrity/12.rq +37 -0
  84. data/spec/queries/integrity/14.rq +25 -0
  85. data/spec/queries/integrity/19_1.rq +21 -0
  86. data/spec/queries/integrity/19_2.rq +15 -0
  87. data/spec/queries/integrity/2.rq +22 -0
  88. data/spec/queries/integrity/3.rq +19 -0
  89. data/spec/queries/integrity/4.rq +13 -0
  90. data/spec/queries/integrity/5.rq +14 -0
  91. data/spec/r_builder_spec.rb +33 -0
  92. data/spec/spec_helper.rb +17 -0
  93. data/spec/turtle/bacon +149 -0
  94. data/spec/turtle/reference +2066 -0
  95. metadata +259 -0
@@ -0,0 +1,36 @@
1
+ # This is temporary, just to help w/ development so I don't have to rewrite r2rdf.rb to be
2
+ # a standard gem base yet. Also load s the files instead of require for easy reloading
3
+ require 'tempfile'
4
+ require 'rdf'
5
+ require 'csv'
6
+ require 'rserve'
7
+ require 'sparql'
8
+ require 'sparql/client'
9
+ require 'rdf/turtle'
10
+
11
+ def load_folder(folder)
12
+ Dir.foreach(File.dirname(__FILE__) + "/#{folder}") do |file|
13
+ unless file == "." or file == ".."
14
+ load File.dirname(__FILE__) + "/#{folder}/" + file
15
+ end
16
+ end
17
+ end
18
+
19
+ load File.dirname(__FILE__) + '/dataset/interactive.rb'
20
+ load File.dirname(__FILE__) + '/query/query_helper.rb'
21
+ load File.dirname(__FILE__) + '/parser.rb'
22
+ load File.dirname(__FILE__) + '/r_client.rb'
23
+ load File.dirname(__FILE__) + '/analyzer.rb'
24
+ load File.dirname(__FILE__) + '/store.rb'
25
+ load File.dirname(__FILE__) + '/dataset/data_cube.rb'
26
+
27
+
28
+ load_folder('metadata')
29
+ load_folder('readers')
30
+ load_folder('writers')
31
+ load_folder('dataset/ORM')
32
+ # Dir.foreach(File.dirname(__FILE__) + '/generators') do |file|
33
+ # unless file == "." or file == ".."
34
+ # load File.dirname(__FILE__) + '/generators/' + file
35
+ # end
36
+ # end
@@ -0,0 +1,105 @@
1
+ class String
2
+ def unindent
3
+ gsub /^#{self[/\A\s*/]}/, ''
4
+ end
5
+ end
6
+
7
+ module R2RDF
8
+ module Metadata
9
+ def defaults
10
+ {
11
+ encode_nulls: false,
12
+ base_url: "http://www.rqtl.org",
13
+ }
14
+ end
15
+
16
+ def basic(fields, options={} )
17
+ #TODO don't assume base dataset is "ns:dataset-var",
18
+ #make it just "var", and try to make that clear to calling classes
19
+
20
+ fields[:var] = sanitize([fields[:var]]).first
21
+ options = defaults().merge(options)
22
+ str = <<-EOF.unindent
23
+ ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}";
24
+ dct:title "#{fields[:title]}";
25
+ dct:creator "#{fields[:creator]}";
26
+ rdfs:comment "#{fields[:description]}";
27
+ dct:description "#{fields[:description]}";
28
+ dct:issued "#{fields[:date]}"^^xsd:date;
29
+ EOF
30
+
31
+ end_str = ""
32
+
33
+ if fields[:subject] && fields[:subject].size > 0
34
+ str << "\tdct:subject \n"
35
+ fields[:subject].each{|subject| str << "\t\t" + subject + ",\n" }
36
+ str[-2] = ";"
37
+ end
38
+
39
+ if fields[:publishers]
40
+ fields[:publishers].map{|publisher|
41
+ raise "No URI for publisher #{publisher}" unless publisher[:uri]
42
+ raise "No label for publisher #{publisher}" unless publisher[:label]
43
+ str << "\tdct:publisher <#{publisher[:uri]}> ;\n"
44
+ end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n\trdfs:label \"#{publisher[:label]}\" .\n\n"
45
+ }
46
+ str[-2] = '.'
47
+ end
48
+
49
+ str + "\n" + end_str
50
+ end
51
+
52
+ def provenance(fields, options={})
53
+ #TODO: should either add a prefixes method or replace some with full URIs
54
+ var = sanitize([fields[:var]]).first
55
+ source_software = fields[:software] # software name, object type, optionally steps list for, eg, R
56
+
57
+ str = "qb:dataset-#{var} a prov:Entity.\n"
58
+ endstr = "qb:dataset-#{var} prov:wasGeneratredBy <#{options[:base_url]}/ns/R2RDF>\n" #replace once gem has an actual name
59
+ if source_software
60
+ source_software = [source_software] unless source_software.respond_to? :map
61
+ source_software.map{|soft|
62
+ str << "<#{options[:base_url]}/ns/prov/software/#{soft}> a prov:Entity .\n"
63
+
64
+ #Note: probably should say derived from the software object, then software object from software.
65
+ endstr << "qb:dataset-#{var} prov:wasDerivedFrom <#{options[:base_url]}/ns/prov/#{soft}> .\n"
66
+ }
67
+ end
68
+ end
69
+
70
+ def r2rdf_metadata
71
+ str <<-EOF.unindent
72
+ <#{options[:base_url]}/ns/R2RDF> a foaf:Agent;
73
+ foaf:name "R2RDF Semantic Web Toolkit";
74
+ org:memberOf <http://sciruby.com/>
75
+ EOF
76
+ end
77
+
78
+ def org_metadata
79
+ str <<-EOF.unindent
80
+ <http://sciruby.com/> a org:Organization;
81
+ skos:prefLabel "SciRuby";
82
+ rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby".
83
+ EOF
84
+ end
85
+
86
+ def metadata_help(topic=nil)
87
+ if topic
88
+ puts "This should display help information for #{topic}, but there's none here yet :("
89
+ else
90
+ puts <<-EOF.unindent
91
+ Available metadata fields:
92
+ (Field) (Ontology) (Description)
93
+
94
+ publishers dct/foaf/org The Organization/s responsible for publishing the dataset
95
+ subject dct The subject of this dataset. Use resources when possible
96
+ var dct The name of the datset resource (used internally)
97
+ creator dct The person or process responsible for creating the dataset
98
+ description dct/rdfs A descriptions of the dataset
99
+ issued dct The date of issuance for the dataset
100
+
101
+ EOF
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,64 @@
1
+ module R2RDF
2
+ module Parser
3
+ def create_graph(string)
4
+ f = Tempfile.new('graph')
5
+ f.write(string)
6
+ f.close
7
+ graph = RDF::Graph.load(f.path, :format => :ttl)
8
+ f.unlink
9
+ graph
10
+ end
11
+
12
+ def get_ary(query_results,method='to_s')
13
+ query_results.map{|solution|
14
+ solution.to_a.map{|entry|
15
+ if entry.last.respond_to? method
16
+ entry.last.send(method)
17
+ else
18
+ entry.last.to_s
19
+ end
20
+ }
21
+ }
22
+ end
23
+
24
+ def get_hashes(query_results,method=nil)
25
+ arr=[]
26
+ query_results.map{|solution|
27
+ h={}
28
+ solution.map{|element|
29
+ if method && element[1].respond_to?(method)
30
+ h[element[0]] = element[1].send(method)
31
+ else
32
+ h[element[0]] = element[1]
33
+ end
34
+ }
35
+ arr << h
36
+ }
37
+ arr
38
+ end
39
+
40
+ def observation_hash(query_results,shorten_uris=false,method='to_s')
41
+ h={}
42
+ query_results.map{|sol|
43
+ (h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
44
+ }
45
+
46
+ if shorten_uris
47
+ newh= {}
48
+ h.map{|k,v|
49
+ newh[strip_uri(k)] ||= {}
50
+ v.map{|kk,vv|
51
+ newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
52
+ }
53
+ }
54
+ newh
55
+ else
56
+ h
57
+ end
58
+ end
59
+
60
+ def strip_uri(uri)
61
+ uri.to_s.split('/').last.split('#').last
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,114 @@
1
+ module RDF
2
+ class Query
3
+ class Solutions
4
+ def to_h
5
+ arr=[]
6
+ self.map{|solution|
7
+ h={}
8
+ solution.map{|element|
9
+ h[element[0]] = element[1]
10
+ }
11
+ arr << h
12
+ }
13
+ arr
14
+ end
15
+ end
16
+ end
17
+ end
18
+
19
+ module R2RDF
20
+ #.gsub(/^\s+/,'')
21
+ module Query
22
+ def vocabulary
23
+ {
24
+ base: RDF::Vocabulary.new('<http://www.rqtl.org/ns/#>'),
25
+ qb: RDF::Vocabulary.new("http://purl.org/linked-data/cube#"),
26
+ rdf: RDF::Vocabulary.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
27
+ rdfs: RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#'),
28
+ prop: RDF::Vocabulary.new('http://www.rqtl.org/dc/properties/'),
29
+ cs: RDF::Vocabulary.new('http://www.rqtl.org/dc/cs')
30
+ }
31
+ end
32
+
33
+
34
+
35
+ # def execute_internal(query,repo)
36
+ # SPARQL.execute(query,repo)
37
+ # end
38
+
39
+ def execute(string,store,type=:fourstore)
40
+ if type == :graph || store.is_a?(RDF::Graph) || store.is_a?(RDF::Repository)
41
+ sparql = SPARQL::Client.new(store)
42
+ elsif store.is_a? R2RDF::Store
43
+ sparql = SPARQL::Client.new(store.url+"/sparql/")
44
+ elsif type == :fourstore
45
+ sparql = SPARQL::Client.new(store+"/sparql/")
46
+ end
47
+ sparql.query(string)
48
+ end
49
+
50
+ def execute_from_file(file,store,type=:fourstore)
51
+ if File.exist?(file)
52
+ string = IO.read(file)
53
+ elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
54
+ string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
55
+ elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
56
+ string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
57
+ else
58
+ raise "couldn't find query for #{file}"
59
+ end
60
+ execute(string, store, type)
61
+ end
62
+
63
+ # def prefixes
64
+ # <<-EOF
65
+ # PREFIX ns: <http://www.rqtl.org/ns/#>
66
+ # PREFIX qb: <http://purl.org/linked-data/cube#>
67
+ # PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
68
+ # PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
69
+ # PREFIX prop: <http://www.rqtl.org/dc/properties/>
70
+ # PREFIX cs: <http://www.rqtl.org/dc/cs/>
71
+ # PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
72
+
73
+ # EOF
74
+ # end
75
+
76
+ def property_values(var, property)
77
+ str = prefixes
78
+ str << <<-EOS
79
+ SELECT ?val WHERE {
80
+ ?obs qb:dataSet ns:dataset-#{var} ;
81
+ prop:#{property} ?val ;
82
+ }
83
+ EOS
84
+ str
85
+ end
86
+
87
+ def row_names(var)
88
+ str = prefixes
89
+ str << <<-EOS
90
+ SELECT ?label WHERE {
91
+ ?obs qb:dataSet ns:dataset-#{var} ;
92
+ prop:refRow ?row .
93
+ ?row skos:prefLabel ?label .
94
+ }
95
+ EOS
96
+ end
97
+
98
+ # Currently will say "___ Component", needs further parsing
99
+ def property_names(var)
100
+ str = prefixes
101
+ str << <<-EOS
102
+ SELECT DISTINCT ?label WHERE {
103
+ ns:dsd-#{var} qb:component ?c .
104
+ ?c rdfs:label ?label
105
+ }
106
+ EOS
107
+ end
108
+
109
+ end
110
+
111
+ class QueryHelper
112
+ include R2RDF::Query
113
+ end
114
+ end
@@ -0,0 +1,54 @@
1
+ module R2RDF
2
+ module Rconnect
3
+
4
+ def connect(address=nil)
5
+ if address
6
+ Rserve::Connection.new(address)
7
+ else
8
+ Rserve::Connection.new
9
+ end
10
+ end
11
+
12
+ def load_workspace(connection,loc=Dir.home,file=".RData")
13
+ loc = File.join(loc,file)
14
+ connection.eval "load(\"#{loc}\")"
15
+ end
16
+
17
+ def get(connection, instruction)
18
+ connection.eval instruction
19
+ end
20
+
21
+ def get_vars(connection)
22
+ connection.eval("ls()")
23
+ end
24
+
25
+ end
26
+
27
+ class Client
28
+ include R2RDF::Rconnect
29
+ attr :R
30
+
31
+ def initialize(auto=true, loc=Dir.home)
32
+ @R = connect
33
+ @loc = loc
34
+ load_ws if auto
35
+ puts "vars: #{vars.payload}" if auto
36
+ end
37
+
38
+ def load_ws
39
+ load_workspace(@R, @loc)
40
+ end
41
+
42
+ def get_var(var)
43
+ get(@R,var)
44
+ end
45
+
46
+ def get_ws
47
+ "#{@loc}/.RData"
48
+ end
49
+
50
+ def vars
51
+ get_vars(@R)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,87 @@
1
+ module R2RDF
2
+ module Reader
3
+ class ARFF
4
+ include R2RDF::Dataset::DataCube
5
+ def generate_n3(arff, options={})
6
+ arff = IO.read(arff) if File.exist? arff
7
+ options[:no_labels] = true # unless options[:no_labels] == nil
8
+ @options = options
9
+ comps = components(arff)
10
+ obs = data(arff, comps.keys)
11
+ repl = generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
12
+
13
+ end
14
+
15
+ def relation(arff)
16
+ arff.match(/@relation.+/i).to_a.first.split.last
17
+ end
18
+
19
+ def components(arff)
20
+ #still needs support for quoted strings with whitespace
21
+ h ={}
22
+ arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
23
+ if line =~ /\{.*}/
24
+ name = line.match(/\s.*/).to_a.first.strip.split.first
25
+ type = :coded
26
+ codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
27
+ h[name] = {type: type, codes: codes}
28
+ else
29
+ name = line.split[1]
30
+ type = line.split[2]
31
+ h[name] = {type: type}
32
+ end
33
+ }
34
+ h
35
+ end
36
+
37
+ def data(arff, attributes)
38
+ lines = arff.split("\n")
39
+ data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
40
+ h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
41
+ data_lines.map{|line|
42
+ line = line.split ','
43
+ attributes.each_with_index{|a,i| h[a] << line[i]}
44
+ }
45
+ h
46
+ end
47
+
48
+ # def coded_dimensions
49
+ # if @options[:codes]
50
+ # @options[:codes]
51
+ # elsif @options[:row_label]
52
+ # [@options[:row_label]]
53
+ # else
54
+ # ["refRow"]
55
+ # end
56
+ # end
57
+
58
+ # def measures
59
+ # if @options[:dimensions]
60
+ # if @options[:measures]
61
+ # @options[:measures] - @options[:dimensions]
62
+ # else
63
+ # # @rexp.payload.names - @options[:dimensions]
64
+ # end
65
+ # else
66
+ # @options[:measures] # || @rexp.payload.names
67
+ # end
68
+ # end
69
+
70
+ # def observation_labels
71
+ # # row_names = @rexp.attr.payload["row.names"].to_ruby
72
+ # # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
73
+ # # row_names
74
+ # end
75
+
76
+ # def observation_data
77
+
78
+ # # data = {}
79
+ # # @rexp.payload.names.map{|name|
80
+ # # data[name] = @rexp.payload[name].to_ruby
81
+ # # }
82
+ # # data[@options[:row_label] || "refRow"] = observation_labels()
83
+ # # data
84
+ # end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,119 @@
1
+ module R2RDF
2
+ module Reader
3
+ class BigCross
4
+ include R2RDF::Dataset::DataCube
5
+
6
+ def generate_n3(client, var, outfile_base, options={})
7
+ meas = measures(client,var,options)
8
+ dim = dimensions(client,var,options)
9
+ codes = codes(client,var,options)
10
+
11
+
12
+ #write structure
13
+ open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
14
+
15
+ n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
16
+ chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
17
+ chromosome_list.map{|chrom|
18
+ open(outfile_base+"_#{chrom}.ttl",'w'){|f| f.write prefixes(var,options)}
19
+ entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
20
+
21
+ #get genotype data (currently only for chromosome 1)
22
+ # => puts "#{var}$geno$'#{chrom}'"
23
+ geno_chr = client.eval("#{var}$geno$'#{chrom}'")
24
+
25
+ #get number of markers per individual
26
+
27
+ #write observations
28
+ n_individuals.times{|indi|
29
+ #time ||= Time.now
30
+ obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
31
+ labels = labels_for(obs_data,chrom.to_s,indi)
32
+ open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
33
+ puts "(#{chrom}) #{indi}/#{n_individuals}" #(#{Time.now - time})
34
+ #time = Time.now
35
+ }
36
+ }
37
+
38
+ #generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
39
+ end
40
+
41
+ def structure(client,var,options={})
42
+ meas = measures(client,var,options)
43
+ dim = dimensions(client,var,options)
44
+ codes = codes(client,var,options)
45
+
46
+ str = prefixes(var,options)
47
+ str << data_structure_definition(meas,var,options)
48
+ str << dataset(var,options)
49
+ component_specifications(meas, dim, var, options).map{ |c| str << c }
50
+ measure_properties(meas,var,options).map{|m| str << m}
51
+
52
+ str
53
+ end
54
+
55
+ def measures(client, var, options={})
56
+ pheno_names = client.eval("names(#{var}$pheno)").to_ruby
57
+ if options[:measures]
58
+ (pheno_names & options[:measures]) | ["genotype","markerpos","marker"]
59
+ else
60
+ pheno_names | ["genotype","markerpos","marker"]
61
+ end
62
+ # measure_properties(measures,var,options)
63
+ end
64
+
65
+ def dimensions(client, var, options={})
66
+ # dimension_properties([""],var)
67
+ []
68
+ end
69
+
70
+ def codes(client, var, options={})
71
+ []
72
+ end
73
+
74
+ def labels_for(data,chr,individual,options={})
75
+ labels=(((data.first.last.size*individual)+1)..(data.first.last.size*(individual+1))).to_a.map(&:to_s)
76
+ labels.map{|l| l.insert(0,"#{chr}_")}
77
+ labels
78
+ end
79
+
80
+ def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
81
+ data = {}
82
+ # geno_chr = client.eval("#{var}$geno$'#{chr}'")
83
+ # n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
84
+ # entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
85
+ data["chr"] = []
86
+ data["genotype"] = []
87
+ data["individual"] = []
88
+ data["marker"] = []
89
+ data["markerpos"] = []
90
+ client.eval("names(#{var}$pheno)").to_ruby.map{|name|
91
+ data[name] = []
92
+ }
93
+ # n_individuals.times{|row_individ|
94
+ # puts "#{row_individ}/#{n_individuals}"
95
+ data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
96
+
97
+ client.eval("names(#{var}$pheno)").to_ruby.map{|name|
98
+ data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno").payload[name].to_ruby[row_individ])
99
+ }
100
+ # @rexp.payload["geno"].payload.names.map { |chr|
101
+ num_markers = geno_chr.payload.first.to_ruby.column_size
102
+ data["chr"] << (1..num_markers).to_a.fill(chr)
103
+ data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
104
+ data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
105
+ data["markerpos"] << geno_chr.payload["map"].to_a
106
+ # }
107
+ # }
108
+ data.map{|k,v| v.flatten!}
109
+ data
110
+ end
111
+
112
+ def num_individuals(client, var, options={})
113
+ client.eval("#{var}$pheno").payload.first.to_ruby.size
114
+ end
115
+
116
+
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,72 @@
1
+ module R2RDF
2
+ module Reader
3
+ class Cross
4
+ include R2RDF::Dataset::DataCube
5
+
6
+ def generate_n3(rexp, var, options={})
7
+ @rexp = rexp
8
+ @options = options
9
+ generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
10
+ end
11
+
12
+ def dimensions
13
+ ["individual","chr","sex","marker"]
14
+ end
15
+
16
+ def codes
17
+ ["individual","chr","sex","marker"]
18
+ end
19
+
20
+ def measures
21
+ ((@rexp.payload["pheno"].payload.names - ["sex"]) | ["genotype","markerpos"])
22
+ end
23
+
24
+ def observation_labels
25
+ # row_names = @rexp.attr.payload["row.names"].to_ruby
26
+ # entries_per_individual = @rexp.payload["geno"].payload[0].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
27
+ entries_per_individual = 0
28
+ @rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
29
+ individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
30
+ (1..(entries_per_individual * individuals)).to_a
31
+ end
32
+
33
+ def observation_data
34
+
35
+ data = {}
36
+ n_individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
37
+ entries_per_individual = 0
38
+ @rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
39
+ # entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
40
+ data["chr"] = []
41
+ data["genotype"] = []
42
+ data["individual"] = []
43
+ data["marker"] = []
44
+ data["markerpos"] = []
45
+ @rexp.payload["pheno"].payload.names.map{|name|
46
+ data[name] = []
47
+ }
48
+ n_individuals.times{|row_individ|
49
+ # puts row_individ
50
+ data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
51
+ @rexp.payload["pheno"].payload.names.map{|name|
52
+ data[name] << (1..entries_per_individual).to_a.fill(@rexp.payload["pheno"].payload[name].to_ruby[row_individ])
53
+ }
54
+ @rexp.payload["geno"].payload.names.map { |chr|
55
+ geno_chr = @rexp.payload["geno"].payload[chr]
56
+ num_markers = geno_chr.payload.first.to_ruby.column_size
57
+ data["chr"] << (1..num_markers).to_a.fill(chr)
58
+ data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
59
+ data["marker"] << geno_chr.payload["map"].to_ruby.names
60
+ data["markerpos"] << geno_chr.payload["map"].to_a
61
+ }
62
+ }
63
+ # data["chr"].flatten!
64
+ # data["genotype"].flatten!
65
+ data.keys.map{|k| data[k].flatten!}
66
+
67
+ #data["refRow"] = observation_labels()
68
+ data
69
+ end
70
+ end
71
+ end
72
+ end