publisci 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -2
- data/LICENSE.txt +19 -17
- data/README.md +41 -8
- data/README.rdoc +3 -5
- data/Rakefile +2 -2
- data/bin/publisci +9 -7
- data/examples/visualization/prov_viz.rb +1 -1
- data/lib/publisci.rb +19 -11
- data/lib/publisci/datacube_model.rb +2 -2
- data/lib/publisci/dataset/ORM/data_cube_orm.rb +2 -2
- data/lib/publisci/dataset/data_cube.rb +1 -1
- data/lib/publisci/dataset/dataset_for.rb +6 -1
- data/lib/publisci/dataset/interactive.rb +1 -46
- data/lib/publisci/generators/base.rb +22 -0
- data/lib/publisci/generators/maf.rb +172 -0
- data/lib/publisci/metadata/generator.rb +1 -1
- data/lib/publisci/parser.rb +62 -62
- data/lib/publisci/parsers/base.rb +29 -0
- data/lib/publisci/parsers/maf.rb +20 -0
- data/lib/publisci/readers/arff.rb +43 -43
- data/lib/publisci/readers/base.rb +2 -2
- data/lib/publisci/readers/csv.rb +2 -1
- data/lib/publisci/readers/maf.rb +15 -181
- data/lib/publisci/readers/r_matrix.rb +143 -143
- data/lib/publisci/writers/arff.rb +1 -1
- data/lib/publisci/writers/base.rb +1 -1
- data/resources/maf_rdf.ttl +98 -22
- data/spec/ORM/data_cube_orm_spec.rb +1 -1
- data/spec/ORM/prov_model_spec.rb +3 -3
- data/spec/dataset_for_spec.rb +1 -1
- data/spec/generators/maf_spec.rb +2 -1
- data/spec/maf_query_spec.rb +1 -1
- metadata +25 -23
- data/lib/r2rdf.rb +0 -226
- data/lib/template_bak.rb +0 -12
data/lib/publisci/parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module PubliSci
|
2
|
-
|
2
|
+
module RDFParser
|
3
3
|
|
4
4
|
def is_uri?(obj)
|
5
5
|
RDF::Resource(obj).valid?
|
@@ -38,61 +38,61 @@ module PubliSci
|
|
38
38
|
h
|
39
39
|
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
41
|
+
def load_string(string,repo=RDF::Repository.new)
|
42
|
+
f = Tempfile.new('repo')
|
43
|
+
f.write(string)
|
44
|
+
f.close
|
45
|
+
repo.load(f.path, :format => :ttl)
|
46
|
+
f.unlink
|
47
|
+
repo
|
48
|
+
end
|
49
49
|
|
50
|
-
|
50
|
+
def get_ary(query_results,method='to_s')
|
51
51
|
query_results.map{|solution|
|
52
52
|
solution.to_a.map{|entry|
|
53
53
|
if entry.last.respond_to? method
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
54
|
+
entry.last.send(method)
|
55
|
+
else
|
56
|
+
entry.last.to_s
|
57
|
+
end
|
58
58
|
}
|
59
59
|
}
|
60
60
|
end
|
61
61
|
|
62
62
|
def get_hashes(query_results,method=nil)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
63
|
+
arr=[]
|
64
|
+
query_results.map{|solution|
|
65
|
+
h={}
|
66
|
+
solution.map{|element|
|
67
|
+
if method && element[1].respond_to?(method)
|
68
|
+
h[element[0]] = element[1].send(method)
|
69
|
+
else
|
70
|
+
h[element[0]] = element[1]
|
71
|
+
end
|
72
|
+
}
|
73
|
+
arr << h
|
74
|
+
}
|
75
|
+
arr
|
76
76
|
end
|
77
77
|
|
78
78
|
def observation_hash(query_results,shorten_uris=false,method='to_s')
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
79
|
+
h={}
|
80
|
+
query_results.map{|sol|
|
81
|
+
(h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
|
82
|
+
}
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
84
|
+
if shorten_uris
|
85
|
+
newh= {}
|
86
|
+
h.map{|k,v|
|
87
|
+
newh[strip_uri(k)] ||= {}
|
88
|
+
v.map{|kk,vv|
|
89
|
+
newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
|
90
|
+
}
|
91
|
+
}
|
92
|
+
newh
|
93
|
+
else
|
94
|
+
h
|
95
|
+
end
|
96
96
|
end
|
97
97
|
|
98
98
|
def to_resource(obj, options={})
|
@@ -162,7 +162,7 @@ module PubliSci
|
|
162
162
|
to_resource(obj,options)
|
163
163
|
elsif obj && obj.is_a?(String) && (obj[0]=="<" && obj[-1] = ">")
|
164
164
|
obj
|
165
|
-
elsif obj.is_a?(Array)
|
165
|
+
elsif obj.is_a?(Array)
|
166
166
|
node_str = add_node(node_index,node_str)
|
167
167
|
["#{node_str}" ] + [bnode_value(obj, node_index, node_str, options)]
|
168
168
|
else
|
@@ -179,7 +179,7 @@ module PubliSci
|
|
179
179
|
if obj.size == 2
|
180
180
|
if obj[0].is_a?(String)
|
181
181
|
if is_complex?(obj[1])
|
182
|
-
str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
|
182
|
+
str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
|
183
183
|
subnodes << encode_value(obj[1], options, node_index, node_str)
|
184
184
|
else
|
185
185
|
str << "#{to_resource(obj[0])} #{encode_value(obj[1], options, node_index, node_str)} "
|
@@ -220,7 +220,7 @@ module PubliSci
|
|
220
220
|
raise "Invalid Structured value: #{obj}"
|
221
221
|
end
|
222
222
|
|
223
|
-
if subnodes.size > 0
|
223
|
+
if subnodes.size > 0
|
224
224
|
[str, subnodes.flatten].flatten
|
225
225
|
else
|
226
226
|
str
|
@@ -231,22 +231,22 @@ module PubliSci
|
|
231
231
|
tabs = 0
|
232
232
|
turtle_str.split("\n").map{|str|
|
233
233
|
case str[-1]
|
234
|
-
|
235
|
-
|
236
|
-
tabs = 0
|
237
|
-
(" " * last_tabs) + str
|
238
|
-
when ";"
|
239
|
-
last_tabs = tabs
|
240
|
-
tabs = 1 if tabs == 0
|
241
|
-
(" " * last_tabs) + str
|
242
|
-
else
|
243
|
-
last_tabs = tabs
|
244
|
-
if str.size < 2
|
234
|
+
when "."
|
235
|
+
last_tabs = tabs
|
245
236
|
tabs = 0
|
237
|
+
(" " * last_tabs) + str
|
238
|
+
when ";"
|
239
|
+
last_tabs = tabs
|
240
|
+
tabs = 1 if tabs == 0
|
241
|
+
(" " * last_tabs) + str
|
246
242
|
else
|
247
|
-
|
248
|
-
|
249
|
-
|
243
|
+
last_tabs = tabs
|
244
|
+
if str.size < 2
|
245
|
+
tabs = 0
|
246
|
+
else
|
247
|
+
tabs += 1
|
248
|
+
end
|
249
|
+
(" " * last_tabs) + str
|
250
250
|
end
|
251
251
|
}.join("\n")
|
252
252
|
|
@@ -262,5 +262,5 @@ module PubliSci
|
|
262
262
|
string.to_s.split(':').last
|
263
263
|
end
|
264
264
|
|
265
|
-
|
266
|
-
end
|
265
|
+
end
|
266
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Parsers
|
3
|
+
module Base
|
4
|
+
include Enumerable
|
5
|
+
# attr_accessor :dataset_name, :measures, :dimensions, :codes
|
6
|
+
|
7
|
+
def valid?(rec)
|
8
|
+
true
|
9
|
+
end
|
10
|
+
|
11
|
+
def enum_method
|
12
|
+
:each
|
13
|
+
end
|
14
|
+
|
15
|
+
def process_record(rec)
|
16
|
+
rec
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(input)
|
20
|
+
input.send(enum_method).each_with_index do |rec, i|
|
21
|
+
yield process_record(rec), i if valid? rec
|
22
|
+
end
|
23
|
+
end
|
24
|
+
alias_method :each_rec, :each
|
25
|
+
alias_method :each_record, :each
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Parsers
|
3
|
+
class MAF
|
4
|
+
extend Base
|
5
|
+
COLUMN_NAMES = %w{ Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID patient_id sample_id}
|
6
|
+
|
7
|
+
def self.valid?(line)
|
8
|
+
not (line[0] == "#" || line[0..3] == "Hugo")
|
9
|
+
end
|
10
|
+
|
11
|
+
def enum_method
|
12
|
+
:each_line
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.process_record(rec)
|
16
|
+
::CSV.parse(rec, {col_sep: "\t"}).flatten[0..(COLUMN_NAMES.length-3)]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -1,49 +1,49 @@
|
|
1
1
|
module PubliSci
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
module Readers
|
3
|
+
class ARFF
|
4
|
+
include PubliSci::Dataset::DataCube
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
def generate_n3(arff, options={})
|
7
|
+
arff = IO.read(arff) if File.exist? arff
|
8
|
+
options[:no_labels] = true
|
9
|
+
@options = options
|
10
|
+
comps = components(arff)
|
11
|
+
obs = data(arff, comps.keys)
|
12
|
+
generate(comps.reject{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, comps.select{|c| comps[c][:codes]}.keys, obs, (1..obs.first[1].size).to_a, relation(arff), options)
|
13
|
+
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
def relation(arff)
|
16
|
+
arff.match(/@relation.+/i).to_a.first.split.last
|
17
|
+
end
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
19
|
+
def components(arff)
|
20
|
+
#still needs support for quoted strings with whitespace
|
21
|
+
h ={}
|
22
|
+
arff.split("\n").select{|lin| lin =~ /^@ATTRIBUTE/i}.map{|line|
|
23
|
+
if line =~ /\{.*}/
|
24
|
+
name = line.match(/\s.*/).to_a.first.strip.split.first
|
25
|
+
type = :coded
|
26
|
+
codes = line.match(/\{.*}/).to_a.first[1..-2].split(',')
|
27
|
+
h[name] = {type: type, codes: codes}
|
28
|
+
else
|
29
|
+
name = line.split[1]
|
30
|
+
type = line.split[2]
|
31
|
+
h[name] = {type: type}
|
32
|
+
end
|
33
|
+
}
|
34
|
+
h
|
35
|
+
end
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
37
|
+
def data(arff, attributes)
|
38
|
+
lines = arff.split("\n")
|
39
|
+
data_lines = lines[lines.index(lines.select{|line| line =~ /^@DATA/i}.first)+1..-1]
|
40
|
+
h=attributes.inject({}){|ha,attrib| ha[attrib] = []; ha}
|
41
|
+
data_lines.map{|line|
|
42
|
+
line = line.split ','
|
43
|
+
attributes.each_with_index{|a,i| h[a] << line[i]}
|
44
|
+
}
|
45
|
+
h
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
49
|
end
|
data/lib/publisci/readers/csv.rb
CHANGED
data/lib/publisci/readers/maf.rb
CHANGED
@@ -1,199 +1,33 @@
|
|
1
1
|
module PubliSci
|
2
2
|
module Readers
|
3
|
-
class MAF
|
4
|
-
|
3
|
+
class MAF
|
4
|
+
extend PubliSci::Readers::Base
|
5
5
|
|
6
|
-
|
6
|
+
def self.generate_n3(input_file, options={})
|
7
|
+
input_file = open(input_file,'r')
|
7
8
|
|
8
|
-
|
9
|
-
{
|
10
|
-
"Variant_Classification" => %w{Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins Missense_Mutation Nonsense_Mutation Silent Splice_Site Translation_Start_Site Nonstop_Mutation 3'UTR 3'Flank 5'UTR 5'Flank IGR1 Intron RNA Targeted_Region},
|
11
|
-
"Variant_Type" => %w{SNP DNP TNP ONP INS DEL Consolidated},
|
12
|
-
"dbSNP_Val_Status" => %w{by1000genomes by2Hit2Allele byCluster byFrequency byHapMap byOtherPop bySubmitter alternate_allele},
|
13
|
-
"Verification_Status" => %w{Verified, Unknown},
|
14
|
-
"Validation_Status" => %w{Untested Inconclusive Valid Invalid},
|
15
|
-
"Mutation_Status" => %w{None Germline Somatic LOH Post-transcriptional modification Unknown},
|
16
|
-
"Sequence_Source" => %w{WGS WGA WXS RNA-Seq miRNA-Seq Bisulfite-Seq VALIDATION Other ncRNA-Seq WCS CLONE POOLCLONE AMPLICON CLONEEND FINISHING ChIP-Seq MNase-Seq DNase-Hypersensitivity EST FL-cDNA CTS MRE-Seq MeDIP-Seq MBD-Seq Tn-Seq FAIRE-seq SELEX RIP-Seq ChIA-PET},
|
17
|
-
"Sequencer" => ["Illumina GAIIx", "Illumina HiSeq", "SOLID", "454", "ABI 3730xl", "Ion Torrent PGM", "Ion Torrent Proton", "PacBio RS", "Illumina MiSeq", "Illumina HiSeq 2500", "454 GS FLX Titanium", "AB SOLiD 4 System" ]
|
18
|
-
}
|
9
|
+
out_base = options[:output_base] || File.basename(input_file,'.*')
|
19
10
|
|
20
|
-
|
21
|
-
|
22
|
-
dataset_name = options[:dataset_name] || nil
|
23
|
-
output = options[:output] || :file
|
24
|
-
output_base = options[:output_base] || nil
|
25
|
-
|
26
|
-
@dimensions = %w{Variant_Classification Variant_Type dbSNP_Val_Status Verification_Status Validation_Status Mutation_Status Sequence_Source Sequencer}
|
27
|
-
# @codes = %w{Variant_Classification Variant_Type}
|
28
|
-
@codes = @dimensions
|
29
|
-
@measures = (COLUMN_NAMES - @dimensions - @codes)
|
30
|
-
@dataset_name ||= File.basename(input_file,'.*')
|
31
|
-
@barcode_index = COLUMN_NAMES.index('Tumor_Sample_Barcode')
|
32
|
-
|
33
|
-
options[:no_labels] ||= true
|
34
|
-
options[:lookup_hugo] ||= false
|
35
|
-
options[:complex_objects] ||= false
|
36
|
-
options[:ranges] ||= COMPONENT_RANGES
|
37
|
-
|
38
|
-
|
39
|
-
if output == :print
|
40
|
-
str = structure(options)
|
41
|
-
f = open(input_file)
|
42
|
-
n = 0
|
43
|
-
f.each_line{|line|
|
44
|
-
processed = process_line(line,n.to_s,options)
|
45
|
-
str << processed.first if processed
|
46
|
-
n +=1
|
47
|
-
}
|
48
|
-
str
|
11
|
+
if options[:output] == :print
|
12
|
+
output = StringIO.new("")
|
49
13
|
else
|
50
|
-
|
51
|
-
# open("#{file_base}_structure.ttl",'w'){|f| f.write structure(options)}
|
52
|
-
file_base = output_base || @dataset_name
|
53
|
-
|
54
|
-
out = open("#{file_base}.ttl",'w')
|
55
|
-
out.write(structure(options))
|
56
|
-
f = open(input_file)
|
57
|
-
n = 0
|
58
|
-
f.each_line{|line|
|
59
|
-
processed = process_line(line,n.to_s,options)
|
60
|
-
out.write(processed.first) if processed
|
61
|
-
n += 1
|
62
|
-
}
|
63
|
-
if options[:lookup_hugo]
|
64
|
-
post_process(out)
|
65
|
-
else
|
66
|
-
out
|
67
|
-
end
|
14
|
+
output = open "#{out_base}.ttl",'w'
|
68
15
|
end
|
69
|
-
end
|
70
|
-
|
71
|
-
def process_line(line,label,options)
|
72
|
-
unless line[0] == "#" || line[0..3] == "Hugo"
|
73
|
-
entry = ::CSV.parse(line, {col_sep: "\t"}).flatten[0..(COLUMN_NAMES.length-3)]
|
74
|
-
|
75
|
-
entry = (entry.fill(nil,entry.length...COLUMN_NAMES.length-2) + parse_barcode(entry[@barcode_index])).flatten
|
76
|
-
|
77
|
-
entry[0] = "http://identifiers.org/hgnc.symbol/#{entry[0]}" if entry[0]
|
78
16
|
|
79
|
-
|
80
|
-
col=1
|
81
|
-
entry[col] = nil if entry[col] == '0'
|
82
|
-
entry[col] = "http://identifiers.org/ncbigene/#{entry[col]}" if entry[col]
|
17
|
+
PubliSci::Generators::MAF.write_structure(input_file, output, options)
|
83
18
|
|
84
|
-
|
85
|
-
|
86
|
-
if entry[col] && entry[col][0..1] == "rs"
|
87
|
-
entry[col] = "http://identifiers.org/dbsnp/#{entry[col].gsub('rs','')}"
|
88
|
-
end
|
89
|
-
|
90
|
-
# optionally create typed objects using sio nodes
|
91
|
-
if options[:complex_objects]
|
92
|
-
entry = sio_values(entry)
|
93
|
-
end
|
94
|
-
|
95
|
-
data = {}
|
96
|
-
COLUMN_NAMES.each_with_index{|col,i|
|
97
|
-
data[col] = [entry[i]]
|
98
|
-
}
|
99
|
-
|
100
|
-
observations(@measures,@dimensions,@codes,data,[label],@dataset_name,options)
|
19
|
+
PubliSci::Parsers::MAF.each_record(input_file) do |rec, label|
|
20
|
+
PubliSci::Generators::MAF.write(rec, output, label, options)
|
101
21
|
end
|
102
|
-
end
|
103
|
-
|
104
|
-
def sio_values(entry)
|
105
|
-
entry[0] = sio_value('http://edamontology.org/data_1791',entry[0]) if entry[0]
|
106
|
-
|
107
|
-
# Link entrez genes
|
108
|
-
col=1
|
109
|
-
entry[col] = sio_value("http://identifiers.org/ncbigene",entry[col]) if entry[col]
|
110
|
-
|
111
|
-
col = COLUMN_NAMES.index('dbSNP_RS')
|
112
|
-
entry[col] = sio_value("http://identifiers.org/dbsnp", entry[col])
|
113
|
-
|
114
|
-
# test SIO attributes for chromosome
|
115
|
-
col = COLUMN_NAMES.index('Chromosome')
|
116
|
-
entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0000340",entry[col])
|
117
|
-
|
118
|
-
|
119
22
|
|
120
|
-
|
121
|
-
%w{Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2}.each{|name|
|
122
|
-
col = COLUMN_NAMES.index(name)
|
123
|
-
entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0001023",entry[col])
|
124
|
-
}
|
23
|
+
output.close
|
125
24
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
col = COLUMN_NAMES.index("Center")
|
130
|
-
entry[col] = sio_attribute("foaf:homepage",entry[col])
|
131
|
-
# entry[col] = [
|
132
|
-
# ["a", "foaf:Organization"],
|
133
|
-
# ["foaf:homepage", entry[col]],
|
134
|
-
# ]
|
135
|
-
|
136
|
-
# Use faldo for locations End_Position
|
137
|
-
col = COLUMN_NAMES.index("Start_Position")
|
138
|
-
entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#begin", entry[col],"http://biohackathon.org/resource/faldo#Position")
|
139
|
-
|
140
|
-
col = COLUMN_NAMES.index("End_Position")
|
141
|
-
entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#end", entry[col],"http://biohackathon.org/resource/faldo#Position")
|
142
|
-
|
143
|
-
entry
|
144
|
-
end
|
145
|
-
|
146
|
-
def column_replace(entry,column,prefix,value=nil)
|
147
|
-
if value
|
148
|
-
entry[COLUMN_NAMES.index(column)] = prefix + value
|
25
|
+
if options[:output] == :print
|
26
|
+
output.string
|
149
27
|
else
|
150
|
-
|
28
|
+
output.path
|
151
29
|
end
|
152
30
|
end
|
153
|
-
|
154
|
-
def official_symbol(hugo_symbol)
|
155
|
-
qry = <<-EOF
|
156
|
-
|
157
|
-
SELECT distinct ?official where {
|
158
|
-
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
|
159
|
-
UNION
|
160
|
-
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
|
161
|
-
|
162
|
-
?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
|
163
|
-
}
|
164
|
-
|
165
|
-
EOF
|
166
|
-
|
167
|
-
sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
|
168
|
-
sparql.query(qry).map(&:official).first.to_s
|
169
|
-
end
|
170
|
-
|
171
|
-
def parse_barcode(code)
|
172
|
-
#TCGA-E9-A22B-01A-11D-A159-09
|
173
|
-
[code[5..11], code[13..-1]]
|
174
|
-
end
|
175
|
-
|
176
|
-
def structure(options={})
|
177
|
-
|
178
|
-
str = prefixes(@dataset_name,options)
|
179
|
-
str << data_structure_definition(@measures,@dimensions,@codes,@dataset_name,options)
|
180
|
-
str << dataset(@dataset_name,options)
|
181
|
-
component_specifications(@measures, @dimensions, @codes, @dataset_name, options).map{ |c| str << c }
|
182
|
-
measure_properties(@measures,@dataset_name,options).map{|m| str << m}
|
183
|
-
dimension_properties(@dimensions,@codes, @dataset_name,options).map{|d| str << d}
|
184
|
-
code_lists(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
|
185
|
-
concept_codes(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
|
186
|
-
str
|
187
|
-
end
|
188
|
-
|
189
|
-
def post_process(file)
|
190
|
-
reg = %r{http://identifiers.org/hgnc.symbol/(\w+)}
|
191
|
-
@@hugo_cache ||= {}
|
192
|
-
PubliSci::PostProcessor.process(file,file,reg){|g|
|
193
|
-
@@hugo_cache[g] ||= official_symbol(g)
|
194
|
-
'http://identifiers.org/hgnc.symbol/' + cache[g]
|
195
|
-
}
|
196
|
-
end
|
197
31
|
end
|
198
32
|
end
|
199
33
|
end
|