publisci 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -2
- data/LICENSE.txt +19 -17
- data/README.md +41 -8
- data/README.rdoc +3 -5
- data/Rakefile +2 -2
- data/bin/publisci +9 -7
- data/examples/visualization/prov_viz.rb +1 -1
- data/lib/publisci.rb +19 -11
- data/lib/publisci/datacube_model.rb +2 -2
- data/lib/publisci/dataset/ORM/data_cube_orm.rb +2 -2
- data/lib/publisci/dataset/data_cube.rb +1 -1
- data/lib/publisci/dataset/dataset_for.rb +6 -1
- data/lib/publisci/dataset/interactive.rb +1 -46
- data/lib/publisci/generators/base.rb +22 -0
- data/lib/publisci/generators/maf.rb +172 -0
- data/lib/publisci/metadata/generator.rb +1 -1
- data/lib/publisci/parser.rb +62 -62
- data/lib/publisci/parsers/base.rb +29 -0
- data/lib/publisci/parsers/maf.rb +20 -0
- data/lib/publisci/readers/arff.rb +43 -43
- data/lib/publisci/readers/base.rb +2 -2
- data/lib/publisci/readers/csv.rb +2 -1
- data/lib/publisci/readers/maf.rb +15 -181
- data/lib/publisci/readers/r_matrix.rb +143 -143
- data/lib/publisci/writers/arff.rb +1 -1
- data/lib/publisci/writers/base.rb +1 -1
- data/resources/maf_rdf.ttl +98 -22
- data/spec/ORM/data_cube_orm_spec.rb +1 -1
- data/spec/ORM/prov_model_spec.rb +3 -3
- data/spec/dataset_for_spec.rb +1 -1
- data/spec/generators/maf_spec.rb +2 -1
- data/spec/maf_query_spec.rb +1 -1
- metadata +25 -23
- data/lib/r2rdf.rb +0 -226
- data/lib/template_bak.rb +0 -12
data/lib/publisci/parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module PubliSci
|
2
|
-
|
2
|
+
module RDFParser
|
3
3
|
|
4
4
|
def is_uri?(obj)
|
5
5
|
RDF::Resource(obj).valid?
|
@@ -38,61 +38,61 @@ module PubliSci
|
|
38
38
|
h
|
39
39
|
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
41
|
+
def load_string(string,repo=RDF::Repository.new)
|
42
|
+
f = Tempfile.new('repo')
|
43
|
+
f.write(string)
|
44
|
+
f.close
|
45
|
+
repo.load(f.path, :format => :ttl)
|
46
|
+
f.unlink
|
47
|
+
repo
|
48
|
+
end
|
49
49
|
|
50
|
-
|
50
|
+
def get_ary(query_results,method='to_s')
|
51
51
|
query_results.map{|solution|
|
52
52
|
solution.to_a.map{|entry|
|
53
53
|
if entry.last.respond_to? method
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
54
|
+
entry.last.send(method)
|
55
|
+
else
|
56
|
+
entry.last.to_s
|
57
|
+
end
|
58
58
|
}
|
59
59
|
}
|
60
60
|
end
|
61
61
|
|
62
62
|
def get_hashes(query_results,method=nil)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
63
|
+
arr=[]
|
64
|
+
query_results.map{|solution|
|
65
|
+
h={}
|
66
|
+
solution.map{|element|
|
67
|
+
if method && element[1].respond_to?(method)
|
68
|
+
h[element[0]] = element[1].send(method)
|
69
|
+
else
|
70
|
+
h[element[0]] = element[1]
|
71
|
+
end
|
72
|
+
}
|
73
|
+
arr << h
|
74
|
+
}
|
75
|
+
arr
|
76
76
|
end
|
77
77
|
|
78
78
|
def observation_hash(query_results,shorten_uris=false,method='to_s')
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
79
|
+
h={}
|
80
|
+
query_results.map{|sol|
|
81
|
+
(h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
|
82
|
+
}
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
84
|
+
if shorten_uris
|
85
|
+
newh= {}
|
86
|
+
h.map{|k,v|
|
87
|
+
newh[strip_uri(k)] ||= {}
|
88
|
+
v.map{|kk,vv|
|
89
|
+
newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
|
90
|
+
}
|
91
|
+
}
|
92
|
+
newh
|
93
|
+
else
|
94
|
+
h
|
95
|
+
end
|
96
96
|
end
|
97
97
|
|
98
98
|
def to_resource(obj, options={})
|
@@ -162,7 +162,7 @@ module PubliSci
|
|
162
162
|
to_resource(obj,options)
|
163
163
|
elsif obj && obj.is_a?(String) && (obj[0]=="<" && obj[-1] = ">")
|
164
164
|
obj
|
165
|
-
elsif obj.is_a?(Array)
|
165
|
+
elsif obj.is_a?(Array)
|
166
166
|
node_str = add_node(node_index,node_str)
|
167
167
|
["#{node_str}" ] + [bnode_value(obj, node_index, node_str, options)]
|
168
168
|
else
|
@@ -179,7 +179,7 @@ module PubliSci
|
|
179
179
|
if obj.size == 2
|
180
180
|
if obj[0].is_a?(String)
|
181
181
|
if is_complex?(obj[1])
|
182
|
-
str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
|
182
|
+
str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
|
183
183
|
subnodes << encode_value(obj[1], options, node_index, node_str)
|
184
184
|
else
|
185
185
|
str << "#{to_resource(obj[0])} #{encode_value(obj[1], options, node_index, node_str)} "
|
@@ -220,7 +220,7 @@ module PubliSci
|
|
220
220
|
raise "Invalid Structured value: #{obj}"
|
221
221
|
end
|
222
222
|
|
223
|
-
if subnodes.size > 0
|
223
|
+
if subnodes.size > 0
|
224
224
|
[str, subnodes.flatten].flatten
|
225
225
|
else
|
226
226
|
str
|
@@ -231,22 +231,22 @@ module PubliSci
|
|
231
231
|
tabs = 0
|
232
232
|
turtle_str.split("\n").map{|str|
|
233
233
|
case str[-1]
|
234
|
-
|
235
|
-
|
236
|
-
tabs = 0
|
237
|
-
(" " * last_tabs) + str
|
238
|
-
when ";"
|
239
|
-
last_tabs = tabs
|
240
|
-
tabs = 1 if tabs == 0
|
241
|
-
(" " * last_tabs) + str
|
242
|
-
else
|
243
|
-
last_tabs = tabs
|
244
|
-
if str.size < 2
|
234
|
+
when "."
|
235
|
+
last_tabs = tabs
|
245
236
|
tabs = 0
|
237
|
+
(" " * last_tabs) + str
|
238
|
+
when ";"
|
239
|
+
last_tabs = tabs
|
240
|
+
tabs = 1 if tabs == 0
|
241
|
+
(" " * last_tabs) + str
|
246
242
|
else
|
247
|
-
|
248
|
-
|
249
|
-
|
243
|
+
last_tabs = tabs
|
244
|
+
if str.size < 2
|
245
|
+
tabs = 0
|
246
|
+
else
|
247
|
+
tabs += 1
|
248
|
+
end
|
249
|
+
(" " * last_tabs) + str
|
250
250
|
end
|
251
251
|
}.join("\n")
|
252
252
|
|
@@ -262,5 +262,5 @@ module PubliSci
|
|
262
262
|
string.to_s.split(':').last
|
263
263
|
end
|
264
264
|
|
265
|
-
|
266
|
-
end
|
265
|
+
end
|
266
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Parsers
|
3
|
+
module Base
|
4
|
+
include Enumerable
|
5
|
+
# attr_accessor :dataset_name, :measures, :dimensions, :codes
|
6
|
+
|
7
|
+
def valid?(rec)
|
8
|
+
true
|
9
|
+
end
|
10
|
+
|
11
|
+
def enum_method
|
12
|
+
:each
|
13
|
+
end
|
14
|
+
|
15
|
+
def process_record(rec)
|
16
|
+
rec
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(input)
|
20
|
+
input.send(enum_method).each_with_index do |rec, i|
|
21
|
+
yield process_record(rec), i if valid? rec
|
22
|
+
end
|
23
|
+
end
|
24
|
+
alias_method :each_rec, :each
|
25
|
+
alias_method :each_record, :each
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Parsers
|
3
|
+
class MAF
|
4
|
+
extend Base
|
5
|
+
COLUMN_NAMES = %w{ Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID patient_id sample_id}
|
6
|
+
|
7
|
+
def self.valid?(line)
|
8
|
+
not (line[0] == "#" || line[0..3] == "Hugo")
|
9
|
+
end
|
10
|
+
|
11
|
+
def enum_method
|
12
|
+
:each_line
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.process_record(rec)
|
16
|
+
::CSV.parse(rec, {col_sep: "\t"}).flatten[0..(COLUMN_NAMES.length-3)]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -1,49 +1,49 @@
|
|
1
1
|
module PubliSci
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
module Readers
|
3
|
+
class ARFF
|
4
|
+
include PubliSci::Dataset::DataCube
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
def generate_n3(arff, options={})
|
7
|
+
arff = IO.read(arff) if File.exist? arff
|
8
|
+
options[:no_labels] = true
|
9
|
+
@options = options
|
10
|
+
comps = components(arff)
|
11
|
+
obs = data(arff, comps.keys)
|
12
|
+
generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
|
13
|
+
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
def relation(arff)
|
16
|
+
arff.match(/@relation.+/i).to_a.first.split.last
|
17
|
+
end
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
19
|
+
def components(arff)
|
20
|
+
#still needs support for quoted strings with whitespace
|
21
|
+
h ={}
|
22
|
+
arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
|
23
|
+
if line =~ /\{.*}/
|
24
|
+
name = line.match(/\s.*/).to_a.first.strip.split.first
|
25
|
+
type = :coded
|
26
|
+
codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
|
27
|
+
h[name] = {type: type, codes: codes}
|
28
|
+
else
|
29
|
+
name = line.split[1]
|
30
|
+
type = line.split[2]
|
31
|
+
h[name] = {type: type}
|
32
|
+
end
|
33
|
+
}
|
34
|
+
h
|
35
|
+
end
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
37
|
+
def data(arff, attributes)
|
38
|
+
lines = arff.split("\n")
|
39
|
+
data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
|
40
|
+
h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
|
41
|
+
data_lines.map{|line|
|
42
|
+
line = line.split ','
|
43
|
+
attributes.each_with_index{|a,i| h[a] << line[i]}
|
44
|
+
}
|
45
|
+
h
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
49
|
end
|
data/lib/publisci/readers/csv.rb
CHANGED
data/lib/publisci/readers/maf.rb
CHANGED
@@ -1,199 +1,33 @@
|
|
1
1
|
module PubliSci
|
2
2
|
module Readers
|
3
|
-
class MAF
|
4
|
-
|
3
|
+
class MAF
|
4
|
+
extend PubliSci::Readers::Base
|
5
5
|
|
6
|
-
|
6
|
+
def self.generate_n3(input_file, options={})
|
7
|
+
input_file = open(input_file,'r')
|
7
8
|
|
8
|
-
|
9
|
-
{
|
10
|
-
"Variant_Classification" => %w{Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins Missense_Mutation Nonsense_Mutation Silent Splice_Site Translation_Start_Site Nonstop_Mutation 3'UTR 3'Flank 5'UTR 5'Flank IGR1 Intron RNA Targeted_Region},
|
11
|
-
"Variant_Type" => %w{SNP DNP TNP ONP INS DEL Consolidated},
|
12
|
-
"dbSNP_Val_Status" => %w{by1000genomes by2Hit2Allele byCluster byFrequency byHapMap byOtherPop bySubmitter alternate_allele},
|
13
|
-
"Verification_Status" => %w{Verified, Unknown},
|
14
|
-
"Validation_Status" => %w{Untested Inconclusive Valid Invalid},
|
15
|
-
"Mutation_Status" => %w{None Germline Somatic LOH Post-transcriptional modification Unknown},
|
16
|
-
"Sequence_Source" => %w{WGS WGA WXS RNA-Seq miRNA-Seq Bisulfite-Seq VALIDATION Other ncRNA-Seq WCS CLONE POOLCLONE AMPLICON CLONEEND FINISHING ChIP-Seq MNase-Seq DNase-Hypersensitivity EST FL-cDNA CTS MRE-Seq MeDIP-Seq MBD-Seq Tn-Seq FAIRE-seq SELEX RIP-Seq ChIA-PET},
|
17
|
-
"Sequencer" => ["Illumina GAIIx", "Illumina HiSeq", "SOLID", "454", "ABI 3730xl", "Ion Torrent PGM", "Ion Torrent Proton", "PacBio RS", "Illumina MiSeq", "Illumina HiSeq 2500", "454 GS FLX Titanium", "AB SOLiD 4 System" ]
|
18
|
-
}
|
9
|
+
out_base = options[:output_base] || File.basename(input_file,'.*')
|
19
10
|
|
20
|
-
|
21
|
-
|
22
|
-
dataset_name = options[:dataset_name] || nil
|
23
|
-
output = options[:output] || :file
|
24
|
-
output_base = options[:output_base] || nil
|
25
|
-
|
26
|
-
@dimensions = %w{Variant_Classification Variant_Type dbSNP_Val_Status Verification_Status Validation_Status Mutation_Status Sequence_Source Sequencer}
|
27
|
-
# @codes = %w{Variant_Classification Variant_Type}
|
28
|
-
@codes = @dimensions
|
29
|
-
@measures = (COLUMN_NAMES - @dimensions - @codes)
|
30
|
-
@dataset_name ||= File.basename(input_file,'.*')
|
31
|
-
@barcode_index = COLUMN_NAMES.index('Tumor_Sample_Barcode')
|
32
|
-
|
33
|
-
options[:no_labels] ||= true
|
34
|
-
options[:lookup_hugo] ||= false
|
35
|
-
options[:complex_objects] ||= false
|
36
|
-
options[:ranges] ||= COMPONENT_RANGES
|
37
|
-
|
38
|
-
|
39
|
-
if output == :print
|
40
|
-
str = structure(options)
|
41
|
-
f = open(input_file)
|
42
|
-
n = 0
|
43
|
-
f.each_line{|line|
|
44
|
-
processed = process_line(line,n.to_s,options)
|
45
|
-
str << processed.first if processed
|
46
|
-
n +=1
|
47
|
-
}
|
48
|
-
str
|
11
|
+
if options[:output] == :print
|
12
|
+
output = StringIO.new("")
|
49
13
|
else
|
50
|
-
|
51
|
-
# open("#{file_base}_structure.ttl",'w'){|f| f.write structure(options)}
|
52
|
-
file_base = output_base || @dataset_name
|
53
|
-
|
54
|
-
out = open("#{file_base}.ttl",'w')
|
55
|
-
out.write(structure(options))
|
56
|
-
f = open(input_file)
|
57
|
-
n = 0
|
58
|
-
f.each_line{|line|
|
59
|
-
processed = process_line(line,n.to_s,options)
|
60
|
-
out.write(processed.first) if processed
|
61
|
-
n += 1
|
62
|
-
}
|
63
|
-
if options[:lookup_hugo]
|
64
|
-
post_process(out)
|
65
|
-
else
|
66
|
-
out
|
67
|
-
end
|
14
|
+
output = open "#{out_base}.ttl",'w'
|
68
15
|
end
|
69
|
-
end
|
70
|
-
|
71
|
-
def process_line(line,label,options)
|
72
|
-
unless line[0] == "#" || line[0..3] == "Hugo"
|
73
|
-
entry = ::CSV.parse(line, {col_sep: "\t"}).flatten[0..(COLUMN_NAMES.length-3)]
|
74
|
-
|
75
|
-
entry = (entry.fill(nil,entry.length...COLUMN_NAMES.length-2) + parse_barcode(entry[@barcode_index])).flatten
|
76
|
-
|
77
|
-
entry[0] = "http://identifiers.org/hgnc.symbol/#{entry[0]}" if entry[0]
|
78
16
|
|
79
|
-
|
80
|
-
col=1
|
81
|
-
entry[col] = nil if entry[col] == '0'
|
82
|
-
entry[col] = "http://identifiers.org/ncbigene/#{entry[col]}" if entry[col]
|
17
|
+
PubliSci::Generators::MAF.write_structure(input_file, output, options)
|
83
18
|
|
84
|
-
|
85
|
-
|
86
|
-
if entry[col] && entry[col][0..1] == "rs"
|
87
|
-
entry[col] = "http://identifiers.org/dbsnp/#{entry[col].gsub('rs','')}"
|
88
|
-
end
|
89
|
-
|
90
|
-
# optionally create typed objects using sio nodes
|
91
|
-
if options[:complex_objects]
|
92
|
-
entry = sio_values(entry)
|
93
|
-
end
|
94
|
-
|
95
|
-
data = {}
|
96
|
-
COLUMN_NAMES.each_with_index{|col,i|
|
97
|
-
data[col] = [entry[i]]
|
98
|
-
}
|
99
|
-
|
100
|
-
observations(@measures,@dimensions,@codes,data,[label],@dataset_name,options)
|
19
|
+
PubliSci::Parsers::MAF.each_record(input_file) do |rec, label|
|
20
|
+
PubliSci::Generators::MAF.write(rec, output, label, options)
|
101
21
|
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def sio_values(entry)
|
105
|
-
entry[0] = sio_value('http://edamontology.org/data_1791',entry[0]) if entry[0]
|
106
|
-
|
107
|
-
# Link entrez genes
|
108
|
-
col=1
|
109
|
-
entry[col] = sio_value("http://identifiers.org/ncbigene",entry[col]) if entry[col]
|
110
|
-
|
111
|
-
col = COLUMN_NAMES.index('dbSNP_RS')
|
112
|
-
entry[col] = sio_value("http://identifiers.org/dbsnp", entry[col])
|
113
|
-
|
114
|
-
# test SIO attributes for chromosome
|
115
|
-
col = COLUMN_NAMES.index('Chromosome')
|
116
|
-
entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0000340",entry[col])
|
117
|
-
|
118
|
-
|
119
22
|
|
120
|
-
|
121
|
-
%w{Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2}.each{|name|
|
122
|
-
col = COLUMN_NAMES.index(name)
|
123
|
-
entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0001023",entry[col])
|
124
|
-
}
|
23
|
+
output.close
|
125
24
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
col = COLUMN_NAMES.index("Center")
|
130
|
-
entry[col] = sio_attribute("foaf:homepage",entry[col])
|
131
|
-
# entry[col] = [
|
132
|
-
# ["a", "foaf:Organization"],
|
133
|
-
# ["foaf:homepage", entry[col]],
|
134
|
-
# ]
|
135
|
-
|
136
|
-
# Use faldo for locations End_Position
|
137
|
-
col = COLUMN_NAMES.index("Start_Position")
|
138
|
-
entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#begin", entry[col],"http://biohackathon.org/resource/faldo#Position")
|
139
|
-
|
140
|
-
col = COLUMN_NAMES.index("End_Position")
|
141
|
-
entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#end", entry[col],"http://biohackathon.org/resource/faldo#Position")
|
142
|
-
|
143
|
-
entry
|
144
|
-
end
|
145
|
-
|
146
|
-
def column_replace(entry,column,prefix,value=nil)
|
147
|
-
if value
|
148
|
-
entry[COLUMN_NAMES.index(column)] = prefix + value
|
25
|
+
if options[:output] == :print
|
26
|
+
output.string
|
149
27
|
else
|
150
|
-
|
28
|
+
output.path
|
151
29
|
end
|
152
30
|
end
|
153
|
-
|
154
|
-
def official_symbol(hugo_symbol)
|
155
|
-
qry = <<-EOF
|
156
|
-
|
157
|
-
SELECT distinct ?official where {
|
158
|
-
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
|
159
|
-
UNION
|
160
|
-
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
|
161
|
-
|
162
|
-
?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
|
163
|
-
}
|
164
|
-
|
165
|
-
EOF
|
166
|
-
|
167
|
-
sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
|
168
|
-
sparql.query(qry).map(&:official).first.to_s
|
169
|
-
end
|
170
|
-
|
171
|
-
def parse_barcode(code)
|
172
|
-
#TCGA-E9-A22B-01A-11D-A159-09
|
173
|
-
[code[5..11], code[13..-1]]
|
174
|
-
end
|
175
|
-
|
176
|
-
def structure(options={})
|
177
|
-
|
178
|
-
str = prefixes(@dataset_name,options)
|
179
|
-
str << data_structure_definition(@measures,@dimensions,@codes,@dataset_name,options)
|
180
|
-
str << dataset(@dataset_name,options)
|
181
|
-
component_specifications(@measures, @dimensions, @codes, @dataset_name, options).map{ |c| str << c }
|
182
|
-
measure_properties(@measures,@dataset_name,options).map{|m| str << m}
|
183
|
-
dimension_properties(@dimensions,@codes, @dataset_name,options).map{|d| str << d}
|
184
|
-
code_lists(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
|
185
|
-
concept_codes(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
|
186
|
-
str
|
187
|
-
end
|
188
|
-
|
189
|
-
def post_process(file)
|
190
|
-
reg = %r{http://identifiers.org/hgnc.symbol/(\w+)}
|
191
|
-
@@hugo_cache ||= {}
|
192
|
-
PubliSci::PostProcessor.process(file,file,reg){|g|
|
193
|
-
@@hugo_cache[g] ||= official_symbol(g)
|
194
|
-
'http://identifiers.org/hgnc.symbol/' + cache[g]
|
195
|
-
}
|
196
|
-
end
|
197
31
|
end
|
198
32
|
end
|
199
33
|
end
|