bio-publisci 0.0.8 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +10 -0
- data/Rakefile +1 -1
- data/bin/bio-publisci-server +50 -0
- data/features/reader_steps.rb +1 -1
- data/lib/bio-publisci.rb +11 -2
- data/lib/bio-publisci/datacube_model.rb +92 -88
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +0 -1
- data/lib/bio-publisci/dataset/data_cube.rb +78 -44
- data/lib/bio-publisci/dataset/dataset_for.rb +26 -27
- data/lib/bio-publisci/metadata/metadata_model.rb +21 -23
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +5 -5
- data/lib/bio-publisci/output.rb +1 -1
- data/lib/bio-publisci/parser.rb +130 -12
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +13 -8
- data/lib/bio-publisci/readers/arff.rb +1 -1
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +2 -5
- data/lib/bio-publisci/readers/dataframe.rb +2 -2
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +6 -10
- data/lib/bio-publisci/readers/r_matrix.rb +1 -1
- data/lib/bio-publisci/writers/base.rb +16 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +1 -1
- data/scripts/scan_islet.rb +1 -1
- data/scripts/update_reference.rb +8 -3
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +1 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/data_cube_spec.rb +66 -63
- data/spec/dataset_for_spec.rb +36 -16
- data/spec/dsl_spec.rb +41 -0
- data/spec/generators/csv_spec.rb +3 -3
- data/spec/generators/dataframe_spec.rb +2 -2
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +2 -2
- data/spec/generators/r_matrix_spec.rb +2 -2
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/resource/example.Rhistory +1 -1
- data/spec/turtle/bacon +9 -9
- data/spec/turtle/reference +43 -43
- data/spec/turtle/weather +10 -10
- data/spec/writer_spec.rb +16 -2
- metadata +212 -61
@@ -37,10 +37,10 @@ module PubliSci
|
|
37
37
|
# end
|
38
38
|
|
39
39
|
def execute(string,store,type=:fourstore)
|
40
|
-
|
41
|
-
sparql = SPARQL::Client.new(store)
|
42
|
-
elsif store.is_a? PubliSci::Store
|
40
|
+
if store.is_a?(PubliSci::Store) || store.is_a?(RDF::FourStore)
|
43
41
|
sparql = SPARQL::Client.new(store.url+"/sparql/")
|
42
|
+
elsif type == :graph || store.is_a?(RDF::Graph) || store.is_a?(RDF::Repository)
|
43
|
+
sparql = SPARQL::Client.new(store)
|
44
44
|
elsif type == :fourstore
|
45
45
|
sparql = SPARQL::Client.new(store+"/sparql/")
|
46
46
|
end
|
@@ -48,12 +48,17 @@ module PubliSci
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def execute_from_file(file,store,type=:fourstore,substitutions={})
|
51
|
+
if Gem::Dependency.new('bio-publisci').matching_specs.size > 0
|
52
|
+
queries_dir = Gem::Specification.find_by_name("bio-publisci").gem_dir + "/resources/queries/"
|
53
|
+
else
|
54
|
+
queries_dir = File.dirname(__FILE__) + '/../../../resources/queries/'
|
55
|
+
end
|
51
56
|
if File.exist?(file)
|
52
57
|
string = IO.read(file)
|
53
|
-
elsif File.exist?(
|
54
|
-
string = IO.read(
|
55
|
-
elsif File.exist?(
|
56
|
-
string = IO.read(
|
58
|
+
elsif File.exist?(queries_dir + file)
|
59
|
+
string = IO.read(queries_dir + file)
|
60
|
+
elsif File.exist?(queries_dir + file + '.rq')
|
61
|
+
string = IO.read(queries_dir + file + '.rq')
|
57
62
|
else
|
58
63
|
raise "couldn't find query for #{file}"
|
59
64
|
end
|
@@ -113,6 +118,6 @@ SELECT DISTINCT ?label WHERE {
|
|
113
118
|
end
|
114
119
|
|
115
120
|
class QueryHelper
|
116
|
-
|
121
|
+
extend PubliSci::Query
|
117
122
|
end
|
118
123
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Readers
|
3
|
+
class Base
|
4
|
+
include PubliSci::Query
|
5
|
+
include PubliSci::Parser
|
6
|
+
include PubliSci::Analyzer
|
7
|
+
include PubliSci::Interactive
|
8
|
+
include PubliSci::Dataset::DataCube
|
9
|
+
|
10
|
+
#should be overridden if extra processing/input is required
|
11
|
+
def automatic(*args)
|
12
|
+
generate_n3(args[0],Hash[*args[1..-2]])
|
13
|
+
end
|
14
|
+
|
15
|
+
def generate_n3(*args)
|
16
|
+
raise "#{self} does not implement a generate_n3 method!"
|
17
|
+
end
|
18
|
+
|
19
|
+
def sio_value(type,value)
|
20
|
+
[
|
21
|
+
["a", type],
|
22
|
+
["http://semanticscience.org/resource/SIO_000300",value]
|
23
|
+
]
|
24
|
+
end
|
25
|
+
|
26
|
+
def sio_attribute(attribute_type,value,data_type=nil)
|
27
|
+
inner = [
|
28
|
+
"http://semanticscience.org/resource/SIO_000300",value
|
29
|
+
]
|
30
|
+
if data_type
|
31
|
+
inner = [["a", data_type], inner]
|
32
|
+
end
|
33
|
+
|
34
|
+
outer =
|
35
|
+
[
|
36
|
+
"http://semanticscience.org/resource/SIO_000008",
|
37
|
+
inner
|
38
|
+
]
|
39
|
+
|
40
|
+
if attribute_type
|
41
|
+
outer = [["a", attribute_type], outer]
|
42
|
+
end
|
43
|
+
|
44
|
+
# puts "#{outer}"
|
45
|
+
outer
|
46
|
+
end
|
47
|
+
|
48
|
+
def next_label
|
49
|
+
if @__current_label
|
50
|
+
@__current_label += 1
|
51
|
+
else
|
52
|
+
@__current_label = 0
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -1,9 +1,6 @@
|
|
1
1
|
module PubliSci
|
2
|
-
module
|
3
|
-
class CSV
|
4
|
-
include PubliSci::Dataset::DataCube
|
5
|
-
include PubliSci::Interactive
|
6
|
-
|
2
|
+
module Readers
|
3
|
+
class CSV < Base
|
7
4
|
def automatic(file=nil,dataset_name=nil,options={},interactive=true)
|
8
5
|
#to do
|
9
6
|
# puts "f #{file} \n ds #{dataset_name} opts #{options}"
|
@@ -0,0 +1,199 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Readers
|
3
|
+
class MAF < Base
|
4
|
+
COLUMN_NAMES = %w{ Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID patient_id sample_id}
|
5
|
+
|
6
|
+
COMPONENT_RANGES = { "Tumor_Sample_Barcode" => "xsd:string", "Start_position" => "xsd:int", "Center" => "xsd:string", "NCBI_Build" => "xsd:int", "Chromosome" => "xsd:int" }
|
7
|
+
|
8
|
+
TCGA_CODES =
|
9
|
+
{
|
10
|
+
"Variant_Classification" => %w{Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins Missense_Mutation Nonsense_Mutation Silent Splice_Site Translation_Start_Site Nonstop_Mutation 3'UTR 3'Flank 5'UTR 5'Flank IGR1 Intron RNA Targeted_Region},
|
11
|
+
"Variant_Type" => %w{SNP DNP TNP ONP INS DEL Consolidated},
|
12
|
+
"dbSNP_Val_Status" => %w{by1000genomes by2Hit2Allele byCluster byFrequency byHapMap byOtherPop bySubmitter alternate_allele},
|
13
|
+
"Verification_Status" => %w{Verified, Unknown},
|
14
|
+
"Validation_Status" => %w{Untested Inconclusive Valid Invalid},
|
15
|
+
"Mutation_Status" => %w{None Germline Somatic LOH Post-transcriptional modification Unknown},
|
16
|
+
"Sequence_Source" => %w{WGS WGA WXS RNA-Seq miRNA-Seq Bisulfite-Seq VALIDATION Other ncRNA-Seq WCS CLONE POOLCLONE AMPLICON CLONEEND FINISHING ChIP-Seq MNase-Seq DNase-Hypersensitivity EST FL-cDNA CTS MRE-Seq MeDIP-Seq MBD-Seq Tn-Seq FAIRE-seq SELEX RIP-Seq ChIA-PET},
|
17
|
+
"Sequencer" => ["Illumina GAIIx", "Illumina HiSeq", "SOLID", "454", "ABI 3730xl", "Ion Torrent PGM", "Ion Torrent Proton", "PacBio RS", "Illumina MiSeq", "Illumina HiSeq 2500", "454 GS FLX Titanium", "AB SOLiD 4 System" ]
|
18
|
+
}
|
19
|
+
|
20
|
+
def generate_n3(input_file, options={})
|
21
|
+
|
22
|
+
dataset_name = options[:dataset_name] || nil
|
23
|
+
output = options[:output] || :file
|
24
|
+
output_base = options[:output_base] || nil
|
25
|
+
|
26
|
+
@dimensions = %w{Variant_Classification Variant_Type dbSNP_Val_Status Verification_Status Validation_Status Mutation_Status Sequence_Source Sequencer}
|
27
|
+
# @codes = %w{Variant_Classification Variant_Type}
|
28
|
+
@codes = @dimensions
|
29
|
+
@measures = (COLUMN_NAMES - @dimensions - @codes)
|
30
|
+
@dataset_name ||= File.basename(input_file,'.*')
|
31
|
+
@barcode_index = COLUMN_NAMES.index('Tumor_Sample_Barcode')
|
32
|
+
|
33
|
+
options[:no_labels] ||= true
|
34
|
+
options[:lookup_hugo] ||= false
|
35
|
+
options[:complex_objects] ||= false
|
36
|
+
options[:ranges] ||= COMPONENT_RANGES
|
37
|
+
|
38
|
+
|
39
|
+
if output == :print
|
40
|
+
str = structure(options)
|
41
|
+
f = open(input_file)
|
42
|
+
n = 0
|
43
|
+
f.each_line{|line|
|
44
|
+
processed = process_line(line,n.to_s,options)
|
45
|
+
str << processed.first if processed
|
46
|
+
n +=1
|
47
|
+
}
|
48
|
+
str
|
49
|
+
else
|
50
|
+
# TODO - allow multi file / separate structure output for very large datasets
|
51
|
+
# open("#{file_base}_structure.ttl",'w'){|f| f.write structure(options)}
|
52
|
+
file_base = output_base || @dataset_name
|
53
|
+
|
54
|
+
out = open("#{file_base}.ttl",'w')
|
55
|
+
out.write(structure(options))
|
56
|
+
f = open(input_file)
|
57
|
+
n = 0
|
58
|
+
f.each_line{|line|
|
59
|
+
processed = process_line(line,n.to_s,options)
|
60
|
+
out.write(processed.first) if processed
|
61
|
+
n += 1
|
62
|
+
}
|
63
|
+
if options[:lookup_hugo]
|
64
|
+
post_process(out)
|
65
|
+
else
|
66
|
+
out
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def process_line(line,label,options)
|
72
|
+
unless line[0] == "#" || line[0..3] == "Hugo"
|
73
|
+
entry = ::CSV.parse(line, {col_sep: "\t"}).flatten[0..(COLUMN_NAMES.length-3)]
|
74
|
+
|
75
|
+
entry = (entry.fill(nil,entry.length...COLUMN_NAMES.length-2) + parse_barcode(entry[@barcode_index])).flatten
|
76
|
+
|
77
|
+
entry[0] = "http://identifiers.org/hgnc.symbol/#{entry[0]}" if entry[0]
|
78
|
+
|
79
|
+
# A 0 in the entrez-id column appears to mean null
|
80
|
+
col=1
|
81
|
+
entry[col] = nil if entry[col] == '0'
|
82
|
+
entry[col] = "http://identifiers.org/ncbigene/#{entry[col]}" if entry[col]
|
83
|
+
|
84
|
+
# Only link non-novel dbSNP entries
|
85
|
+
col = COLUMN_NAMES.index('dbSNP_RS')
|
86
|
+
if entry[col] && entry[col][0..1] == "rs"
|
87
|
+
entry[col] = "http://identifiers.org/dbsnp/#{entry[col].gsub('rs','')}"
|
88
|
+
end
|
89
|
+
|
90
|
+
# optionally create typed objects using sio nodes
|
91
|
+
if options[:complex_objects]
|
92
|
+
entry = sio_values(entry)
|
93
|
+
end
|
94
|
+
|
95
|
+
data = {}
|
96
|
+
COLUMN_NAMES.each_with_index{|col,i|
|
97
|
+
data[col] = [entry[i]]
|
98
|
+
}
|
99
|
+
|
100
|
+
observations(@measures,@dimensions,@codes,data,[label],@dataset_name,options)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def sio_values(entry)
|
105
|
+
entry[0] = sio_value('http://edamontology.org/data_1791',entry[0]) if entry[0]
|
106
|
+
|
107
|
+
# Link entrez genes
|
108
|
+
col=1
|
109
|
+
entry[col] = sio_value("http://identifiers.org/ncbigene",entry[col]) if entry[col]
|
110
|
+
|
111
|
+
col = COLUMN_NAMES.index('dbSNP_RS')
|
112
|
+
entry[col] = sio_value("http://identifiers.org/dbsnp", entry[col])
|
113
|
+
|
114
|
+
# test SIO attributes for chromosome
|
115
|
+
col = COLUMN_NAMES.index('Chromosome')
|
116
|
+
entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0000340",entry[col])
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
# More SIO attrtibutes for alleles
|
121
|
+
%w{Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2}.each{|name|
|
122
|
+
col = COLUMN_NAMES.index(name)
|
123
|
+
entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0001023",entry[col])
|
124
|
+
}
|
125
|
+
|
126
|
+
col = COLUMN_NAMES.index("Strand")
|
127
|
+
entry[col] = sio_attribute("http://edamontology.org/data_0853",entry[col])
|
128
|
+
|
129
|
+
col = COLUMN_NAMES.index("Center")
|
130
|
+
entry[col] = sio_attribute("foaf:homepage",entry[col])
|
131
|
+
# entry[col] = [
|
132
|
+
# ["a", "foaf:Organization"],
|
133
|
+
# ["foaf:homepage", entry[col]],
|
134
|
+
# ]
|
135
|
+
|
136
|
+
# Use faldo for locations End_Position
|
137
|
+
col = COLUMN_NAMES.index("Start_Position")
|
138
|
+
entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#begin", entry[col],"http://biohackathon.org/resource/faldo#Position")
|
139
|
+
|
140
|
+
col = COLUMN_NAMES.index("End_Position")
|
141
|
+
entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#end", entry[col],"http://biohackathon.org/resource/faldo#Position")
|
142
|
+
|
143
|
+
entry
|
144
|
+
end
|
145
|
+
|
146
|
+
def column_replace(entry,column,prefix,value=nil)
|
147
|
+
if value
|
148
|
+
entry[COLUMN_NAMES.index(column)] = prefix + value
|
149
|
+
else
|
150
|
+
entry[COLUMN_NAMES.index(column)] += prefix
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def official_symbol(hugo_symbol)
|
155
|
+
qry = <<-EOF
|
156
|
+
|
157
|
+
SELECT distinct ?official where {
|
158
|
+
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
|
159
|
+
UNION
|
160
|
+
{?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
|
161
|
+
|
162
|
+
?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
|
163
|
+
}
|
164
|
+
|
165
|
+
EOF
|
166
|
+
|
167
|
+
sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
|
168
|
+
sparql.query(qry).map(&:official).first.to_s
|
169
|
+
end
|
170
|
+
|
171
|
+
def parse_barcode(code)
|
172
|
+
#TCGA-E9-A22B-01A-11D-A159-09
|
173
|
+
[code[5..11], code[13..-1]]
|
174
|
+
end
|
175
|
+
|
176
|
+
def structure(options={})
|
177
|
+
|
178
|
+
str = prefixes(@dataset_name,options)
|
179
|
+
str << data_structure_definition(@measures,@dimensions,@codes,@dataset_name,options)
|
180
|
+
str << dataset(@dataset_name,options)
|
181
|
+
component_specifications(@measures, @dimensions, @codes, @dataset_name, options).map{ |c| str << c }
|
182
|
+
measure_properties(@measures,@dataset_name,options).map{|m| str << m}
|
183
|
+
dimension_properties(@dimensions,@codes, @dataset_name,options).map{|d| str << d}
|
184
|
+
code_lists(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
|
185
|
+
concept_codes(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
|
186
|
+
str
|
187
|
+
end
|
188
|
+
|
189
|
+
def post_process(file)
|
190
|
+
reg = %r{http://identifiers.org/hgnc.symbol/(\w+)}
|
191
|
+
@@hugo_cache ||= {}
|
192
|
+
PubliSci::PostProcessor.process(file,file,reg){|g|
|
193
|
+
@@hugo_cache[g] ||= official_symbol(g)
|
194
|
+
'http://identifiers.org/hgnc.symbol/' + cache[g]
|
195
|
+
}
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module PubliSci
|
2
|
-
module
|
2
|
+
module Readers
|
3
3
|
class RCross
|
4
4
|
include PubliSci::Dataset::DataCube
|
5
|
-
include PubliSci::
|
5
|
+
include PubliSci::Readers::Output
|
6
6
|
|
7
7
|
def generate_n3(client, var, outfile_base, options={})
|
8
8
|
meas = measures(client,var,options)
|
@@ -75,33 +75,29 @@ module PubliSci
|
|
75
75
|
|
76
76
|
def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
|
77
77
|
data = {}
|
78
|
-
|
79
|
-
# n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
|
80
|
-
# entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
78
|
+
|
81
79
|
data["chr"] = []
|
82
80
|
data["genotype"] = []
|
83
81
|
data["individual"] = []
|
84
82
|
data["marker"] = []
|
85
83
|
data["markerpos"] = []
|
84
|
+
|
86
85
|
pheno_names = client.eval("names(#{var}$pheno)").to_ruby
|
87
86
|
pheno_names.map{|name|
|
88
87
|
data[name] = []
|
89
88
|
}
|
90
|
-
# n_individuals.times{|row_individ|
|
91
|
-
# puts "#{row_individ}/#{n_individuals}"
|
92
89
|
data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
|
93
90
|
|
94
91
|
pheno_names.map{|name|
|
95
92
|
data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno$#{name}").to_ruby[row_individ])
|
96
93
|
}
|
97
|
-
|
94
|
+
|
98
95
|
num_markers = geno_chr.payload.first.to_ruby.column_size
|
99
96
|
data["chr"] << (1..num_markers).to_a.fill(chr)
|
100
97
|
data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
|
101
98
|
data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
|
102
99
|
data["markerpos"] << geno_chr.payload["map"].to_a
|
103
|
-
|
104
|
-
# }
|
100
|
+
|
105
101
|
data.map{|k,v| v.flatten!}
|
106
102
|
data
|
107
103
|
end
|
@@ -63,6 +63,7 @@ module PubliSci
|
|
63
63
|
|
64
64
|
def codes(input, data_set = nil, select = :label)
|
65
65
|
repo = handle_input(input)
|
66
|
+
|
66
67
|
if data_set
|
67
68
|
codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
|
68
69
|
else
|
@@ -72,6 +73,21 @@ module PubliSci
|
|
72
73
|
(h[el.first]||=[]) << el.last; h
|
73
74
|
}
|
74
75
|
end
|
76
|
+
|
77
|
+
def turtle_to_ruby(turtle_file, select_dataset=nil, shorten_url=true)
|
78
|
+
repo = RDF::Repository.load(turtle_file)
|
79
|
+
|
80
|
+
repo_to_ruby(repo,select_dataset,shorten_url)
|
81
|
+
end
|
82
|
+
|
83
|
+
def repo_to_ruby(repo,select_dataset=nil, shorten_url=true)
|
84
|
+
select_dataset = dataSet(repo,:dataset) unless select_dataset
|
85
|
+
dims = dimensions(repo,select_dataset)
|
86
|
+
meas = measures(repo,select_dataset)
|
87
|
+
codes = codes(repo,select_dataset)
|
88
|
+
data = observations(repo,select_dataset,shorten_url)
|
89
|
+
{measures: meas, dimensions: dims, coded_dimensions: codes, data: data}
|
90
|
+
end
|
75
91
|
end
|
76
92
|
end
|
77
93
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Writers
|
3
|
+
class JSON < Base
|
4
|
+
def build_json(data)
|
5
|
+
data.values.to_json
|
6
|
+
end
|
7
|
+
|
8
|
+
def from_turtle(file,select_dataset=nil,shorten_url=true)
|
9
|
+
rb = turtle_to_ruby(file,select_dataset,shorten_url)
|
10
|
+
build_json(rb[:data])
|
11
|
+
end
|
12
|
+
|
13
|
+
def from_store(file,select_dataset=nil,shorten_url=true)
|
14
|
+
build_json(repo_to_ruby(file,select_dataset,shorten_url)[:data])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
#version 2.4
|
2
|
+
Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID chromosome_name_WU start_WU stop_WU reference_WU variant_WU type_WU gene_name_WU transcript_name_WU transcript_species_WU transcript_source_WU transcript_version_WU strand_WU transcript_status_WU trv_type_WU c_position_WU amino_acid_change_WU ucsc_cons_WU domain_WU all_domains_WU deletion_substructures_WU transcript_error
|
3
|
+
A1BG 0 genome.wustl.edu 37 19 58862784 58862784 + Missense_Mutation SNP C C T novel TCGA-E9-A22B-01A-11D-A159-09 TCGA-E9-A22B-10A-01D-A159-09 C C Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx e46a5d19-2dd7-4c34-8fff-6276278c58b3 f948182a-f814-4e3c-83ee-82b78aa423c1 19 58862784 58862784 C T SNP A1BG NM_130786.3 human genbank 58_37c -1 reviewed missense c.883 p.A295T 0.915 HMMSmart_SM00409,superfamily_Immunoglobulin HMMSmart_SM00408,HMMSmart_SM00409,HMMPfam_ig,superfamily_Immunoglobulin - no_errors
|
4
|
+
A1BG 0 genome.wustl.edu 37 19 58864366 58864366 + Missense_Mutation SNP G G A rs151098196 byFrequency TCGA-E9-A1NH-01A-11D-A14G-09 TCGA-E9-A1NH-11A-33D-A14G-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx 13c312ec-0add-4758-ab8d-c193e2e08c6d 0ee95056-a7cc-415c-a487-3ad08604dfc0 19 58864366 58864366 G A SNP A1BG NM_130786.3 human genbank 58_37c -1 reviewed missense c.268 p.R90C 0.950 HMMSmart_SM00408,HMMSmart_SM00409,superfamily_Immunoglobulin HMMSmart_SM00408,HMMSmart_SM00409,HMMPfam_ig,superfamily_Immunoglobulin - no_errors
|
5
|
+
A1CF 29974 genome.wustl.edu 37 10 52595854 52595854 + Missense_Mutation SNP G G A novel TCGA-BH-A0HP-01A-12D-A099-09 TCGA-BH-A0HP-10A-01D-A099-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx ad52a8fb-7a76-4aa0-95fb-d6edab0fe2b2 8c059d33-23de-439a-914a-290527c5efbe 10 52595854 52595854 G A SNP A1CF NM_138932.1 human genbank 58_37c -1 reviewed missense c.584 p.A195V 1.000 HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_RNA-binding domain RBD HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_dsRNA-binding domain-like,superfamily_RNA-binding domain RBD - no_errors
|
6
|
+
A1CF 29974 genome.wustl.edu 37 10 52595937 52595937 + Silent SNP G G A novel TCGA-BH-A18P-01A-11D-A12B-09 TCGA-BH-A18P-11A-43D-A12B-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx add624a3-57e9-46be-9bcc-3e53d7c2dfb7 5cae8dca-b28a-4483-9c03-6f0645161c04 10 52595937 52595937 G A SNP A1CF NM_138932.1 human genbank 58_37c -1 reviewed silent c.501 p.I167 0.615 HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_RNA-binding domain RBD HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_dsRNA-binding domain-like,superfamily_RNA-binding domain RBD - no_errors
|
7
|
+
A2BP1 0 genome.wustl.edu 37 16 7568361 7568361 + Silent SNP G G C novel TCGA-D8-A1JN-01A-11D-A13L-09 TCGA-D8-A1JN-10A-01D-A13O-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx c83c7d48-8671-4f27-b3dd-05411fa2f784 c14cac2a-e308-44fa-b1af-ee51511ee0ee 16 7568361 7568361 G C SNP A2BP1 NM_145891.2 human genbank 58_37c +1 reviewed silent c.300 p.T100 0.995 NULL HMMPfam_RRM_1,HMMSmart_RRM,superfamily_SSF54928 - no_errors
|
8
|
+
A2BP1 54715 genome.wustl.edu 37 16 7102099 7102099 + Missense_Mutation SNP G G T novel TCGA-E2-A1BC-01A-11D-A14G-09 TCGA-E2-A1BC-10A-01D-A12Q-09 G G G T G G Unknown Valid Somatic Phase_IV WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx 5947a9db-7d13-44ff-86ad-eb5e6c8dcec5 6a4cd52f-2247-4caf-9b37-e90b02fd4d8b 16 7102099 7102099 G T SNP A2BP1 NM_001142334.1 human genbank 58_37c +1 reviewed missense c.27 p.R9S 1.000 NULL HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_RNA-binding domain RBD - no_errors
|
9
|
+
A2BP1 54715 genome.wustl.edu 37 16 7383011 7383011 + Silent SNP G G A novel TCGA-AR-A1AJ-01A-21D-A12Q-09 TCGA-AR-A1AJ-10A-01D-A12Q-09 G G G A G G Unknown Valid Somatic Phase_IV WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx 4e1f9084-4729-4b3f-b036-6226d64fd25b 63ee3781-4578-4d19-88e4-c8785fc7987e 16 7383011 7383011 G A SNP A2BP1 NM_145891.2 human genbank 58_37c +1 reviewed silent c.9 p.A3 1.000 NULL HMMPfam_RRM_1,HMMSmart_RRM,superfamily_SSF54928 - no_errors
|
10
|
+
A2M 2 genome.wustl.edu 37 12 9251298 9251298 + Nonsense_Mutation SNP G G A novel TCGA-A8-A08G-01A-11W-A019-09 TCGA-A8-A08G-10A-01W-A021-09 G G G A G G Unknown Valid Somatic Phase_IV WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx 8da61928-e935-4a33-8e46-840e637163d7 74a3a4af-c93a-4fcd-af11-1f5eeb847c3c 12 9251298 9251298 G A SNP A2M NM_000014.4 human genbank 58_37c -1 reviewed nonsense c.1756 p.R586* 0.003 HMMPfam_A2M_N_2 HMMPfam_A2M,HMMPfam_A2M_N,superfamily_Terpenoid cyclases/Protein prenyltransferases,HMMPfam_A2M_recep,superfamily_Alpha-macroglobulin receptor domain,PatternScan_TONB_DEPENDENT_REC_1,HMMPfam_A2M_N_2,HMMPfam_A2M_comp,HMMPfam_Thiol-ester_cl,PatternScan_ALPHA_2_MACROGLOBULIN - no_errors
|