bio-publisci 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/.travis.yml +13 -0
  4. data/Gemfile +24 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.md +47 -0
  7. data/README.rdoc +48 -0
  8. data/Rakefile +70 -0
  9. data/bin/bio-publisci +83 -0
  10. data/features/create_generator.feature +25 -0
  11. data/features/integration.feature +12 -0
  12. data/features/integration_steps.rb +10 -0
  13. data/features/orm.feature +60 -0
  14. data/features/orm_steps.rb +74 -0
  15. data/features/reader.feature +25 -0
  16. data/features/reader_steps.rb +60 -0
  17. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  18. data/features/store.feature +27 -0
  19. data/features/store_steps.rb +42 -0
  20. data/features/support/env.rb +13 -0
  21. data/features/writer.feature +9 -0
  22. data/features/writer_steps.rb +17 -0
  23. data/lib/bio-publisci/analyzer.rb +57 -0
  24. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
  25. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  26. data/lib/bio-publisci/dataset/data_cube.rb +308 -0
  27. data/lib/bio-publisci/dataset/interactive.rb +57 -0
  28. data/lib/bio-publisci/loader.rb +36 -0
  29. data/lib/bio-publisci/metadata/metadata.rb +105 -0
  30. data/lib/bio-publisci/parser.rb +64 -0
  31. data/lib/bio-publisci/query/query_helper.rb +114 -0
  32. data/lib/bio-publisci/r_client.rb +54 -0
  33. data/lib/bio-publisci/readers/arff.rb +87 -0
  34. data/lib/bio-publisci/readers/big_cross.rb +119 -0
  35. data/lib/bio-publisci/readers/cross.rb +72 -0
  36. data/lib/bio-publisci/readers/csv.rb +54 -0
  37. data/lib/bio-publisci/readers/dataframe.rb +66 -0
  38. data/lib/bio-publisci/readers/r_matrix.rb +152 -0
  39. data/lib/bio-publisci/store.rb +56 -0
  40. data/lib/bio-publisci/writers/arff.rb +66 -0
  41. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  42. data/lib/bio-publisci.rb +36 -0
  43. data/lib/r2rdf.rb +226 -0
  44. data/lib/template_bak/publisci.rb +3 -0
  45. data/lib/template_bak.rb +12 -0
  46. data/lib/vocabs/cc.rb +18 -0
  47. data/lib/vocabs/cert.rb +13 -0
  48. data/lib/vocabs/dc.rb +63 -0
  49. data/lib/vocabs/dc11.rb +23 -0
  50. data/lib/vocabs/doap.rb +45 -0
  51. data/lib/vocabs/exif.rb +168 -0
  52. data/lib/vocabs/foaf.rb +69 -0
  53. data/lib/vocabs/geo.rb +13 -0
  54. data/lib/vocabs/http.rb +26 -0
  55. data/lib/vocabs/ma.rb +78 -0
  56. data/lib/vocabs/owl.rb +59 -0
  57. data/lib/vocabs/rdfs.rb +17 -0
  58. data/lib/vocabs/rsa.rb +12 -0
  59. data/lib/vocabs/rss.rb +14 -0
  60. data/lib/vocabs/sioc.rb +93 -0
  61. data/lib/vocabs/skos.rb +36 -0
  62. data/lib/vocabs/wot.rb +21 -0
  63. data/lib/vocabs/xhtml.rb +9 -0
  64. data/lib/vocabs/xsd.rb +58 -0
  65. data/resources/queries/codes.rq +13 -0
  66. data/resources/queries/dataset.rq +7 -0
  67. data/resources/queries/dimension_ranges.rq +8 -0
  68. data/resources/queries/dimensions.rq +7 -0
  69. data/resources/queries/measures.rq +7 -0
  70. data/resources/queries/observations.rq +12 -0
  71. data/resources/queries/test.rq +3 -0
  72. data/resources/weather.numeric.arff +23 -0
  73. data/spec/analyzer_spec.rb +36 -0
  74. data/spec/bio-publisci_spec.rb +7 -0
  75. data/spec/csv/bacon.csv +4 -0
  76. data/spec/csv/moar_bacon.csv +11 -0
  77. data/spec/data_cube_spec.rb +166 -0
  78. data/spec/generators/csv_spec.rb +44 -0
  79. data/spec/generators/dataframe_spec.rb +44 -0
  80. data/spec/generators/r_matrix_spec.rb +35 -0
  81. data/spec/queries/integrity/1.rq +21 -0
  82. data/spec/queries/integrity/11.rq +29 -0
  83. data/spec/queries/integrity/12.rq +37 -0
  84. data/spec/queries/integrity/14.rq +25 -0
  85. data/spec/queries/integrity/19_1.rq +21 -0
  86. data/spec/queries/integrity/19_2.rq +15 -0
  87. data/spec/queries/integrity/2.rq +22 -0
  88. data/spec/queries/integrity/3.rq +19 -0
  89. data/spec/queries/integrity/4.rq +13 -0
  90. data/spec/queries/integrity/5.rq +14 -0
  91. data/spec/r_builder_spec.rb +33 -0
  92. data/spec/spec_helper.rb +17 -0
  93. data/spec/turtle/bacon +149 -0
  94. data/spec/turtle/reference +2066 -0
  95. metadata +259 -0
@@ -0,0 +1,36 @@
1
+ # This is temporary, just to help w/ development so I don't have to rewrite r2rdf.rb to be
2
+ # a standard gem base yet. Also load s the files instead of require for easy reloading
3
+ require 'tempfile'
4
+ require 'rdf'
5
+ require 'csv'
6
+ require 'rserve'
7
+ require 'sparql'
8
+ require 'sparql/client'
9
+ require 'rdf/turtle'
10
+
11
+ def load_folder(folder)
12
+ Dir.foreach(File.dirname(__FILE__) + "/#{folder}") do |file|
13
+ unless file == "." or file == ".."
14
+ load File.dirname(__FILE__) + "/#{folder}/" + file
15
+ end
16
+ end
17
+ end
18
+
19
+ load File.dirname(__FILE__) + '/dataset/interactive.rb'
20
+ load File.dirname(__FILE__) + '/query/query_helper.rb'
21
+ load File.dirname(__FILE__) + '/parser.rb'
22
+ load File.dirname(__FILE__) + '/r_client.rb'
23
+ load File.dirname(__FILE__) + '/analyzer.rb'
24
+ load File.dirname(__FILE__) + '/store.rb'
25
+ load File.dirname(__FILE__) + '/dataset/data_cube.rb'
26
+
27
+
28
+ load_folder('metadata')
29
+ load_folder('readers')
30
+ load_folder('writers')
31
+ load_folder('dataset/ORM')
32
+ # Dir.foreach(File.dirname(__FILE__) + '/generators') do |file|
33
+ # unless file == "." or file == ".."
34
+ # load File.dirname(__FILE__) + '/generators/' + file
35
+ # end
36
+ # end
@@ -0,0 +1,105 @@
1
+ class String
2
+ def unindent
3
+ gsub /^#{self[/\A\s*/]}/, ''
4
+ end
5
+ end
6
+
7
+ module R2RDF
8
+ module Metadata
9
+ def defaults
10
+ {
11
+ encode_nulls: false,
12
+ base_url: "http://www.rqtl.org",
13
+ }
14
+ end
15
+
16
+ def basic(fields, options={} )
17
+ #TODO don't assume base dataset is "ns:dataset-var",
18
+ #make it just "var", and try to make that clear to calling classes
19
+
20
+ fields[:var] = sanitize([fields[:var]]).first
21
+ options = defaults().merge(options)
22
+ str = <<-EOF.unindent
23
+ ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}";
24
+ dct:title "#{fields[:title]}";
25
+ dct:creator "#{fields[:creator]}";
26
+ rdfs:comment "#{fields[:description]}";
27
+ dct:description "#{fields[:description]}";
28
+ dct:issued "#{fields[:date]}"^^xsd:date;
29
+ EOF
30
+
31
+ end_str = ""
32
+
33
+ if fields[:subject] && fields[:subject].size > 0
34
+ str << "\tdct:subject \n"
35
+ fields[:subject].each{|subject| str << "\t\t" + subject + ",\n" }
36
+ str[-2] = ";"
37
+ end
38
+
39
+ if fields[:publishers]
40
+ fields[:publishers].map{|publisher|
41
+ raise "No URI for publisher #{publisher}" unless publisher[:uri]
42
+ raise "No label for publisher #{publisher}" unless publisher[:label]
43
+ str << "\tdct:publisher <#{publisher[:uri]}> ;\n"
44
+ end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n\trdfs:label \"#{publisher[:label]}\" .\n\n"
45
+ }
46
+ str[-2] = '.'
47
+ end
48
+
49
+ str + "\n" + end_str
50
+ end
51
+
52
+ def provenance(fields, options={})
53
+ #TODO: should either add a prefixes method or replace some with full URIs
54
+ var = sanitize([fields[:var]]).first
55
+ source_software = fields[:software] # software name, object type, optionally steps list for, eg, R
56
+
57
+ str = "qb:dataset-#{var} a prov:Entity.\n"
58
+ endstr = "qb:dataset-#{var} prov:wasGeneratredBy <#{options[:base_url]}/ns/R2RDF>\n" #replace once gem has an actual name
59
+ if source_software
60
+ source_software = [source_software] unless source_software.respond_to? :map
61
+ source_software.map{|soft|
62
+ str << "<#{options[:base_url]}/ns/prov/software/#{soft}> a prov:Entity .\n"
63
+
64
+ #Note: probably should say derived from the software object, then software object from software.
65
+ endstr << "qb:dataset-#{var} prov:wasDerivedFrom <#{options[:base_url]}/ns/prov/#{soft}> .\n"
66
+ }
67
+ end
68
+ end
69
+
70
+ def r2rdf_metadata
71
+ str <<-EOF.unindent
72
+ <#{options[:base_url]}/ns/R2RDF> a foaf:Agent;
73
+ foaf:name "R2RDF Semantic Web Toolkit";
74
+ org:memberOf <http://sciruby.com/>
75
+ EOF
76
+ end
77
+
78
+ def org_metadata
79
+ str <<-EOF.unindent
80
+ <http://sciruby.com/> a org:Organization;
81
+ skos:prefLabel "SciRuby";
82
+ rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby".
83
+ EOF
84
+ end
85
+
86
+ def metadata_help(topic=nil)
87
+ if topic
88
+ puts "This should display help information for #{topic}, but there's none here yet :("
89
+ else
90
+ puts <<-EOF.unindent
91
+ Available metadata fields:
92
+ (Field) (Ontology) (Description)
93
+
94
+ publishers dct/foaf/org The Organization/s responsible for publishing the dataset
95
+ subject dct The subject of this dataset. Use resources when possible
96
+ var dct The name of the datset resource (used internally)
97
+ creator dct The person or process responsible for creating the dataset
98
+ description dct/rdfs A descriptions of the dataset
99
+ issued dct The date of issuance for the dataset
100
+
101
+ EOF
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,64 @@
1
+ module R2RDF
2
+ module Parser
3
+ def create_graph(string)
4
+ f = Tempfile.new('graph')
5
+ f.write(string)
6
+ f.close
7
+ graph = RDF::Graph.load(f.path, :format => :ttl)
8
+ f.unlink
9
+ graph
10
+ end
11
+
12
+ def get_ary(query_results,method='to_s')
13
+ query_results.map{|solution|
14
+ solution.to_a.map{|entry|
15
+ if entry.last.respond_to? method
16
+ entry.last.send(method)
17
+ else
18
+ entry.last.to_s
19
+ end
20
+ }
21
+ }
22
+ end
23
+
24
+ def get_hashes(query_results,method=nil)
25
+ arr=[]
26
+ query_results.map{|solution|
27
+ h={}
28
+ solution.map{|element|
29
+ if method && element[1].respond_to?(method)
30
+ h[element[0]] = element[1].send(method)
31
+ else
32
+ h[element[0]] = element[1]
33
+ end
34
+ }
35
+ arr << h
36
+ }
37
+ arr
38
+ end
39
+
40
+ def observation_hash(query_results,shorten_uris=false,method='to_s')
41
+ h={}
42
+ query_results.map{|sol|
43
+ (h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
44
+ }
45
+
46
+ if shorten_uris
47
+ newh= {}
48
+ h.map{|k,v|
49
+ newh[strip_uri(k)] ||= {}
50
+ v.map{|kk,vv|
51
+ newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
52
+ }
53
+ }
54
+ newh
55
+ else
56
+ h
57
+ end
58
+ end
59
+
60
+ def strip_uri(uri)
61
+ uri.to_s.split('/').last.split('#').last
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,114 @@
1
+ module RDF
2
+ class Query
3
+ class Solutions
4
+ def to_h
5
+ arr=[]
6
+ self.map{|solution|
7
+ h={}
8
+ solution.map{|element|
9
+ h[element[0]] = element[1]
10
+ }
11
+ arr << h
12
+ }
13
+ arr
14
+ end
15
+ end
16
+ end
17
+ end
18
+
19
+ module R2RDF
20
+ #.gsub(/^\s+/,'')
21
+ module Query
22
+ def vocabulary
23
+ {
24
+ base: RDF::Vocabulary.new('<http://www.rqtl.org/ns/#>'),
25
+ qb: RDF::Vocabulary.new("http://purl.org/linked-data/cube#"),
26
+ rdf: RDF::Vocabulary.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
27
+ rdfs: RDF::Vocabulary.new('http://www.w3.org/2000/01/rdf-schema#'),
28
+ prop: RDF::Vocabulary.new('http://www.rqtl.org/dc/properties/'),
29
+ cs: RDF::Vocabulary.new('http://www.rqtl.org/dc/cs')
30
+ }
31
+ end
32
+
33
+
34
+
35
+ # def execute_internal(query,repo)
36
+ # SPARQL.execute(query,repo)
37
+ # end
38
+
39
+ def execute(string,store,type=:fourstore)
40
+ if type == :graph || store.is_a?(RDF::Graph) || store.is_a?(RDF::Repository)
41
+ sparql = SPARQL::Client.new(store)
42
+ elsif store.is_a? R2RDF::Store
43
+ sparql = SPARQL::Client.new(store.url+"/sparql/")
44
+ elsif type == :fourstore
45
+ sparql = SPARQL::Client.new(store+"/sparql/")
46
+ end
47
+ sparql.query(string)
48
+ end
49
+
50
+ def execute_from_file(file,store,type=:fourstore)
51
+ if File.exist?(file)
52
+ string = IO.read(file)
53
+ elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
54
+ string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
55
+ elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
56
+ string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
57
+ else
58
+ raise "couldn't find query for #{file}"
59
+ end
60
+ execute(string, store, type)
61
+ end
62
+
63
+ # def prefixes
64
+ # <<-EOF
65
+ # PREFIX ns: <http://www.rqtl.org/ns/#>
66
+ # PREFIX qb: <http://purl.org/linked-data/cube#>
67
+ # PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
68
+ # PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
69
+ # PREFIX prop: <http://www.rqtl.org/dc/properties/>
70
+ # PREFIX cs: <http://www.rqtl.org/dc/cs/>
71
+ # PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
72
+
73
+ # EOF
74
+ # end
75
+
76
+ def property_values(var, property)
77
+ str = prefixes
78
+ str << <<-EOS
79
+ SELECT ?val WHERE {
80
+ ?obs qb:dataSet ns:dataset-#{var} ;
81
+ prop:#{property} ?val ;
82
+ }
83
+ EOS
84
+ str
85
+ end
86
+
87
+ def row_names(var)
88
+ str = prefixes
89
+ str << <<-EOS
90
+ SELECT ?label WHERE {
91
+ ?obs qb:dataSet ns:dataset-#{var} ;
92
+ prop:refRow ?row .
93
+ ?row skos:prefLabel ?label .
94
+ }
95
+ EOS
96
+ end
97
+
98
+ # Currently will say "___ Component", needs further parsing
99
+ def property_names(var)
100
+ str = prefixes
101
+ str << <<-EOS
102
+ SELECT DISTINCT ?label WHERE {
103
+ ns:dsd-#{var} qb:component ?c .
104
+ ?c rdfs:label ?label
105
+ }
106
+ EOS
107
+ end
108
+
109
+ end
110
+
111
+ class QueryHelper
112
+ include R2RDF::Query
113
+ end
114
+ end
@@ -0,0 +1,54 @@
1
+ module R2RDF
2
+ module Rconnect
3
+
4
+ def connect(address=nil)
5
+ if address
6
+ Rserve::Connection.new(address)
7
+ else
8
+ Rserve::Connection.new
9
+ end
10
+ end
11
+
12
+ def load_workspace(connection,loc=Dir.home,file=".RData")
13
+ loc = File.join(loc,file)
14
+ connection.eval "load(\"#{loc}\")"
15
+ end
16
+
17
+ def get(connection, instruction)
18
+ connection.eval instruction
19
+ end
20
+
21
+ def get_vars(connection)
22
+ connection.eval("ls()")
23
+ end
24
+
25
+ end
26
+
27
+ class Client
28
+ include R2RDF::Rconnect
29
+ attr :R
30
+
31
+ def initialize(auto=true, loc=Dir.home)
32
+ @R = connect
33
+ @loc = loc
34
+ load_ws if auto
35
+ puts "vars: #{vars.payload}" if auto
36
+ end
37
+
38
+ def load_ws
39
+ load_workspace(@R, @loc)
40
+ end
41
+
42
+ def get_var(var)
43
+ get(@R,var)
44
+ end
45
+
46
+ def get_ws
47
+ "#{@loc}/.RData"
48
+ end
49
+
50
+ def vars
51
+ get_vars(@R)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,87 @@
1
+ module R2RDF
2
+ module Reader
3
+ class ARFF
4
+ include R2RDF::Dataset::DataCube
5
+ def generate_n3(arff, options={})
6
+ arff = IO.read(arff) if File.exist? arff
7
+ options[:no_labels] = true # unless options[:no_labels] == nil
8
+ @options = options
9
+ comps = components(arff)
10
+ obs = data(arff, comps.keys)
11
+ repl = generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
12
+
13
+ end
14
+
15
+ def relation(arff)
16
+ arff.match(/@relation.+/i).to_a.first.split.last
17
+ end
18
+
19
+ def components(arff)
20
+ #still needs support for quoted strings with whitespace
21
+ h ={}
22
+ arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
23
+ if line =~ /\{.*}/
24
+ name = line.match(/\s.*/).to_a.first.strip.split.first
25
+ type = :coded
26
+ codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
27
+ h[name] = {type: type, codes: codes}
28
+ else
29
+ name = line.split[1]
30
+ type = line.split[2]
31
+ h[name] = {type: type}
32
+ end
33
+ }
34
+ h
35
+ end
36
+
37
+ def data(arff, attributes)
38
+ lines = arff.split("\n")
39
+ data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
40
+ h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
41
+ data_lines.map{|line|
42
+ line = line.split ','
43
+ attributes.each_with_index{|a,i| h[a] << line[i]}
44
+ }
45
+ h
46
+ end
47
+
48
+ # def coded_dimensions
49
+ # if @options[:codes]
50
+ # @options[:codes]
51
+ # elsif @options[:row_label]
52
+ # [@options[:row_label]]
53
+ # else
54
+ # ["refRow"]
55
+ # end
56
+ # end
57
+
58
+ # def measures
59
+ # if @options[:dimensions]
60
+ # if @options[:measures]
61
+ # @options[:measures] - @options[:dimensions]
62
+ # else
63
+ # # @rexp.payload.names - @options[:dimensions]
64
+ # end
65
+ # else
66
+ # @options[:measures] # || @rexp.payload.names
67
+ # end
68
+ # end
69
+
70
+ # def observation_labels
71
+ # # row_names = @rexp.attr.payload["row.names"].to_ruby
72
+ # # row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
73
+ # # row_names
74
+ # end
75
+
76
+ # def observation_data
77
+
78
+ # # data = {}
79
+ # # @rexp.payload.names.map{|name|
80
+ # # data[name] = @rexp.payload[name].to_ruby
81
+ # # }
82
+ # # data[@options[:row_label] || "refRow"] = observation_labels()
83
+ # # data
84
+ # end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,119 @@
1
+ module R2RDF
2
+ module Reader
3
+ class BigCross
4
+ include R2RDF::Dataset::DataCube
5
+
6
+ def generate_n3(client, var, outfile_base, options={})
7
+ meas = measures(client,var,options)
8
+ dim = dimensions(client,var,options)
9
+ codes = codes(client,var,options)
10
+
11
+
12
+ #write structure
13
+ open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
14
+
15
+ n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
16
+ chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
17
+ chromosome_list.map{|chrom|
18
+ open(outfile_base+"_#{chrom}.ttl",'w'){|f| f.write prefixes(var,options)}
19
+ entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
20
+
21
+ #get genotype data (currently only for chromosome 1)
22
+ # => puts "#{var}$geno$'#{chrom}'"
23
+ geno_chr = client.eval("#{var}$geno$'#{chrom}'")
24
+
25
+ #get number of markers per individual
26
+
27
+ #write observations
28
+ n_individuals.times{|indi|
29
+ #time ||= Time.now
30
+ obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
31
+ labels = labels_for(obs_data,chrom.to_s,indi)
32
+ open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
33
+ puts "(#{chrom}) #{indi}/#{n_individuals}" #(#{Time.now - time})
34
+ #time = Time.now
35
+ }
36
+ }
37
+
38
+ #generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
39
+ end
40
+
41
+ def structure(client,var,options={})
42
+ meas = measures(client,var,options)
43
+ dim = dimensions(client,var,options)
44
+ codes = codes(client,var,options)
45
+
46
+ str = prefixes(var,options)
47
+ str << data_structure_definition(meas,var,options)
48
+ str << dataset(var,options)
49
+ component_specifications(meas, dim, var, options).map{ |c| str << c }
50
+ measure_properties(meas,var,options).map{|m| str << m}
51
+
52
+ str
53
+ end
54
+
55
+ def measures(client, var, options={})
56
+ pheno_names = client.eval("names(#{var}$pheno)").to_ruby
57
+ if options[:measures]
58
+ (pheno_names & options[:measures]) | ["genotype","markerpos","marker"]
59
+ else
60
+ pheno_names | ["genotype","markerpos","marker"]
61
+ end
62
+ # measure_properties(measures,var,options)
63
+ end
64
+
65
+ def dimensions(client, var, options={})
66
+ # dimension_properties([""],var)
67
+ []
68
+ end
69
+
70
+ def codes(client, var, options={})
71
+ []
72
+ end
73
+
74
+ def labels_for(data,chr,individual,options={})
75
+ labels=(((data.first.last.size*individual)+1)..(data.first.last.size*(individual+1))).to_a.map(&:to_s)
76
+ labels.map{|l| l.insert(0,"#{chr}_")}
77
+ labels
78
+ end
79
+
80
+ def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
81
+ data = {}
82
+ # geno_chr = client.eval("#{var}$geno$'#{chr}'")
83
+ # n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
84
+ # entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
85
+ data["chr"] = []
86
+ data["genotype"] = []
87
+ data["individual"] = []
88
+ data["marker"] = []
89
+ data["markerpos"] = []
90
+ client.eval("names(#{var}$pheno)").to_ruby.map{|name|
91
+ data[name] = []
92
+ }
93
+ # n_individuals.times{|row_individ|
94
+ # puts "#{row_individ}/#{n_individuals}"
95
+ data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
96
+
97
+ client.eval("names(#{var}$pheno)").to_ruby.map{|name|
98
+ data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno").payload[name].to_ruby[row_individ])
99
+ }
100
+ # @rexp.payload["geno"].payload.names.map { |chr|
101
+ num_markers = geno_chr.payload.first.to_ruby.column_size
102
+ data["chr"] << (1..num_markers).to_a.fill(chr)
103
+ data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
104
+ data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
105
+ data["markerpos"] << geno_chr.payload["map"].to_a
106
+ # }
107
+ # }
108
+ data.map{|k,v| v.flatten!}
109
+ data
110
+ end
111
+
112
+ def num_individuals(client, var, options={})
113
+ client.eval("#{var}$pheno").payload.first.to_ruby.size
114
+ end
115
+
116
+
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,72 @@
1
+ module R2RDF
2
+ module Reader
3
+ class Cross
4
+ include R2RDF::Dataset::DataCube
5
+
6
+ def generate_n3(rexp, var, options={})
7
+ @rexp = rexp
8
+ @options = options
9
+ generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
10
+ end
11
+
12
+ def dimensions
13
+ ["individual","chr","sex","marker"]
14
+ end
15
+
16
+ def codes
17
+ ["individual","chr","sex","marker"]
18
+ end
19
+
20
+ def measures
21
+ ((@rexp.payload["pheno"].payload.names - ["sex"]) | ["genotype","markerpos"])
22
+ end
23
+
24
+ def observation_labels
25
+ # row_names = @rexp.attr.payload["row.names"].to_ruby
26
+ # entries_per_individual = @rexp.payload["geno"].payload[0].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
27
+ entries_per_individual = 0
28
+ @rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
29
+ individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
30
+ (1..(entries_per_individual * individuals)).to_a
31
+ end
32
+
33
+ def observation_data
34
+
35
+ data = {}
36
+ n_individuals = @rexp.payload["pheno"].payload.first.to_ruby.size
37
+ entries_per_individual = 0
38
+ @rexp.payload["geno"].payload.to_ruby.map{|v| entries_per_individual += (v["map"].size)}
39
+ # entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
40
+ data["chr"] = []
41
+ data["genotype"] = []
42
+ data["individual"] = []
43
+ data["marker"] = []
44
+ data["markerpos"] = []
45
+ @rexp.payload["pheno"].payload.names.map{|name|
46
+ data[name] = []
47
+ }
48
+ n_individuals.times{|row_individ|
49
+ # puts row_individ
50
+ data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
51
+ @rexp.payload["pheno"].payload.names.map{|name|
52
+ data[name] << (1..entries_per_individual).to_a.fill(@rexp.payload["pheno"].payload[name].to_ruby[row_individ])
53
+ }
54
+ @rexp.payload["geno"].payload.names.map { |chr|
55
+ geno_chr = @rexp.payload["geno"].payload[chr]
56
+ num_markers = geno_chr.payload.first.to_ruby.column_size
57
+ data["chr"] << (1..num_markers).to_a.fill(chr)
58
+ data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
59
+ data["marker"] << geno_chr.payload["map"].to_ruby.names
60
+ data["markerpos"] << geno_chr.payload["map"].to_a
61
+ }
62
+ }
63
+ # data["chr"].flatten!
64
+ # data["genotype"].flatten!
65
+ data.keys.map{|k| data[k].flatten!}
66
+
67
+ #data["refRow"] = observation_labels()
68
+ data
69
+ end
70
+ end
71
+ end
72
+ end