bio-publisci 0.0.8 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +10 -0
  3. data/Rakefile +1 -1
  4. data/bin/bio-publisci-server +50 -0
  5. data/features/reader_steps.rb +1 -1
  6. data/lib/bio-publisci.rb +11 -2
  7. data/lib/bio-publisci/datacube_model.rb +92 -88
  8. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +0 -1
  9. data/lib/bio-publisci/dataset/data_cube.rb +78 -44
  10. data/lib/bio-publisci/dataset/dataset_for.rb +26 -27
  11. data/lib/bio-publisci/metadata/metadata_model.rb +21 -23
  12. data/lib/bio-publisci/metadata/prov/model/prov_models.rb +5 -5
  13. data/lib/bio-publisci/output.rb +1 -1
  14. data/lib/bio-publisci/parser.rb +130 -12
  15. data/lib/bio-publisci/post_processor.rb +95 -0
  16. data/lib/bio-publisci/query/query_helper.rb +13 -8
  17. data/lib/bio-publisci/readers/arff.rb +1 -1
  18. data/lib/bio-publisci/readers/base.rb +57 -0
  19. data/lib/bio-publisci/readers/csv.rb +2 -5
  20. data/lib/bio-publisci/readers/dataframe.rb +2 -2
  21. data/lib/bio-publisci/readers/maf.rb +199 -0
  22. data/lib/bio-publisci/readers/r_cross.rb +6 -10
  23. data/lib/bio-publisci/readers/r_matrix.rb +1 -1
  24. data/lib/bio-publisci/writers/base.rb +16 -0
  25. data/lib/bio-publisci/writers/json.rb +18 -0
  26. data/resources/maf_example.maf +10 -0
  27. data/resources/maf_rdf.ttl +1173 -0
  28. data/resources/primer.ttl +38 -0
  29. data/resources/queries/gene.rq +16 -0
  30. data/resources/queries/hugo_to_ensembl.rq +7 -0
  31. data/resources/queries/maf_column.rq +26 -0
  32. data/resources/queries/patient.rq +11 -0
  33. data/resources/queries/patient_list.rq +11 -0
  34. data/resources/queries/patients_with_mutation.rq +18 -0
  35. data/scripts/get_gene_lengths.rb +50 -0
  36. data/scripts/islet_mlratio.rb +1 -1
  37. data/scripts/scan_islet.rb +1 -1
  38. data/scripts/update_reference.rb +8 -3
  39. data/server/helpers.rb +215 -0
  40. data/server/public/src-min-noconflict/LICENSE +24 -0
  41. data/server/public/src-min-noconflict/ace.js +11 -0
  42. data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
  43. data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
  44. data/server/public/src-min-noconflict/ext-emmet.js +1 -0
  45. data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
  46. data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
  47. data/server/public/src-min-noconflict/ext-modelist.js +1 -0
  48. data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
  49. data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
  50. data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
  51. data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
  52. data/server/public/src-min-noconflict/ext-split.js +1 -0
  53. data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
  54. data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
  55. data/server/public/src-min-noconflict/ext-textarea.js +1 -0
  56. data/server/public/src-min-noconflict/ext-themelist.js +1 -0
  57. data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
  58. data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
  59. data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
  60. data/server/public/src-min-noconflict/mode-ruby.js +1 -0
  61. data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
  62. data/server/public/src-min-noconflict/theme-twilight.js +1 -0
  63. data/server/public/src-min-noconflict/worker-coffee.js +1 -0
  64. data/server/public/src-min-noconflict/worker-css.js +1 -0
  65. data/server/public/src-min-noconflict/worker-javascript.js +1 -0
  66. data/server/public/src-min-noconflict/worker-json.js +1 -0
  67. data/server/public/src-min-noconflict/worker-lua.js +1 -0
  68. data/server/public/src-min-noconflict/worker-php.js +1 -0
  69. data/server/public/src-min-noconflict/worker-xquery.js +1 -0
  70. data/server/routes.rb +123 -0
  71. data/server/views/dsl.haml +65 -0
  72. data/server/views/dump.haml +3 -0
  73. data/server/views/import.haml +35 -0
  74. data/server/views/new_repository.haml +25 -0
  75. data/server/views/query.haml +28 -0
  76. data/server/views/repository.haml +25 -0
  77. data/spec/ORM/data_cube_orm_spec.rb +1 -0
  78. data/spec/bnode_spec.rb +66 -0
  79. data/spec/data_cube_spec.rb +66 -63
  80. data/spec/dataset_for_spec.rb +36 -16
  81. data/spec/dsl_spec.rb +41 -0
  82. data/spec/generators/csv_spec.rb +3 -3
  83. data/spec/generators/dataframe_spec.rb +2 -2
  84. data/spec/generators/maf_spec.rb +40 -0
  85. data/spec/generators/r_cross_spec.rb +2 -2
  86. data/spec/generators/r_matrix_spec.rb +2 -2
  87. data/spec/length_lookup_spec.rb +0 -0
  88. data/spec/maf_query_spec.rb +343 -0
  89. data/spec/resource/example.Rhistory +1 -1
  90. data/spec/turtle/bacon +9 -9
  91. data/spec/turtle/reference +43 -43
  92. data/spec/turtle/weather +10 -10
  93. data/spec/writer_spec.rb +16 -2
  94. metadata +212 -61
@@ -37,10 +37,10 @@ module PubliSci
37
37
  # end
38
38
 
39
39
  def execute(string,store,type=:fourstore)
40
- if type == :graph || store.is_a?(RDF::Graph) || store.is_a?(RDF::Repository)
41
- sparql = SPARQL::Client.new(store)
42
- elsif store.is_a? PubliSci::Store
40
+ if store.is_a?(PubliSci::Store) || store.is_a?(RDF::FourStore)
43
41
  sparql = SPARQL::Client.new(store.url+"/sparql/")
42
+ elsif type == :graph || store.is_a?(RDF::Graph) || store.is_a?(RDF::Repository)
43
+ sparql = SPARQL::Client.new(store)
44
44
  elsif type == :fourstore
45
45
  sparql = SPARQL::Client.new(store+"/sparql/")
46
46
  end
@@ -48,12 +48,17 @@ module PubliSci
48
48
  end
49
49
 
50
50
  def execute_from_file(file,store,type=:fourstore,substitutions={})
51
+ if Gem::Dependency.new('bio-publisci').matching_specs.size > 0
52
+ queries_dir = Gem::Specification.find_by_name("bio-publisci").gem_dir + "/resources/queries/"
53
+ else
54
+ queries_dir = File.dirname(__FILE__) + '/../../../resources/queries/'
55
+ end
51
56
  if File.exist?(file)
52
57
  string = IO.read(file)
53
- elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
54
- string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file)
55
- elsif File.exist?(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
56
- string = IO.read(File.dirname(__FILE__) + '/../../../resources/queries/' + file + '.rq')
58
+ elsif File.exist?(queries_dir + file)
59
+ string = IO.read(queries_dir + file)
60
+ elsif File.exist?(queries_dir + file + '.rq')
61
+ string = IO.read(queries_dir + file + '.rq')
57
62
  else
58
63
  raise "couldn't find query for #{file}"
59
64
  end
@@ -113,6 +118,6 @@ SELECT DISTINCT ?label WHERE {
113
118
  end
114
119
 
115
120
  class QueryHelper
116
- include PubliSci::Query
121
+ extend PubliSci::Query
117
122
  end
118
123
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Reader
2
+ module Readers
3
3
  class ARFF
4
4
  include PubliSci::Dataset::DataCube
5
5
 
@@ -0,0 +1,57 @@
1
+ module PubliSci
2
+ module Readers
3
+ class Base
4
+ include PubliSci::Query
5
+ include PubliSci::Parser
6
+ include PubliSci::Analyzer
7
+ include PubliSci::Interactive
8
+ include PubliSci::Dataset::DataCube
9
+
10
+ #should be overridden if extra processing/input is required
11
+ def automatic(*args)
12
+ generate_n3(args[0],Hash[*args[1..-2]])
13
+ end
14
+
15
+ def generate_n3(*args)
16
+ raise "#{self} does not implement a generate_n3 method!"
17
+ end
18
+
19
+ def sio_value(type,value)
20
+ [
21
+ ["a", type],
22
+ ["http://semanticscience.org/resource/SIO_000300",value]
23
+ ]
24
+ end
25
+
26
+ def sio_attribute(attribute_type,value,data_type=nil)
27
+ inner = [
28
+ "http://semanticscience.org/resource/SIO_000300",value
29
+ ]
30
+ if data_type
31
+ inner = [["a", data_type], inner]
32
+ end
33
+
34
+ outer =
35
+ [
36
+ "http://semanticscience.org/resource/SIO_000008",
37
+ inner
38
+ ]
39
+
40
+ if attribute_type
41
+ outer = [["a", attribute_type], outer]
42
+ end
43
+
44
+ # puts "#{outer}"
45
+ outer
46
+ end
47
+
48
+ def next_label
49
+ if @__current_label
50
+ @__current_label += 1
51
+ else
52
+ @__current_label = 0
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -1,9 +1,6 @@
1
1
  module PubliSci
2
- module Reader
3
- class CSV
4
- include PubliSci::Dataset::DataCube
5
- include PubliSci::Interactive
6
-
2
+ module Readers
3
+ class CSV < Base
7
4
  def automatic(file=nil,dataset_name=nil,options={},interactive=true)
8
5
  #to do
9
6
  # puts "f #{file} \n ds #{dataset_name} opts #{options}"
@@ -1,8 +1,8 @@
1
1
  module PubliSci
2
- module Reader
2
+ module Readers
3
3
  class Dataframe
4
4
  include PubliSci::Dataset::DataCube
5
- include PubliSci::Reader::Output
5
+ include PubliSci::Readers::Output
6
6
 
7
7
  # def initialize(var)
8
8
  # @var = var
@@ -0,0 +1,199 @@
1
+ module PubliSci
2
+ module Readers
3
+ class MAF < Base
4
+ COLUMN_NAMES = %w{ Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID patient_id sample_id}
5
+
6
+ COMPONENT_RANGES = { "Tumor_Sample_Barcode" => "xsd:string", "Start_position" => "xsd:int", "Center" => "xsd:string", "NCBI_Build" => "xsd:int", "Chromosome" => "xsd:int" }
7
+
8
+ TCGA_CODES =
9
+ {
10
+ "Variant_Classification" => %w{Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins Missense_Mutation Nonsense_Mutation Silent Splice_Site Translation_Start_Site Nonstop_Mutation 3'UTR 3'Flank 5'UTR 5'Flank IGR1 Intron RNA Targeted_Region},
11
+ "Variant_Type" => %w{SNP DNP TNP ONP INS DEL Consolidated},
12
+ "dbSNP_Val_Status" => %w{by1000genomes by2Hit2Allele byCluster byFrequency byHapMap byOtherPop bySubmitter alternate_allele},
13
+ "Verification_Status" => %w{Verified, Unknown},
14
+ "Validation_Status" => %w{Untested Inconclusive Valid Invalid},
15
+ "Mutation_Status" => %w{None Germline Somatic LOH Post-transcriptional modification Unknown},
16
+ "Sequence_Source" => %w{WGS WGA WXS RNA-Seq miRNA-Seq Bisulfite-Seq VALIDATION Other ncRNA-Seq WCS CLONE POOLCLONE AMPLICON CLONEEND FINISHING ChIP-Seq MNase-Seq DNase-Hypersensitivity EST FL-cDNA CTS MRE-Seq MeDIP-Seq MBD-Seq Tn-Seq FAIRE-seq SELEX RIP-Seq ChIA-PET},
17
+ "Sequencer" => ["Illumina GAIIx", "Illumina HiSeq", "SOLID", "454", "ABI 3730xl", "Ion Torrent PGM", "Ion Torrent Proton", "PacBio RS", "Illumina MiSeq", "Illumina HiSeq 2500", "454 GS FLX Titanium", "AB SOLiD 4 System" ]
18
+ }
19
+
20
+ def generate_n3(input_file, options={})
21
+
22
+ dataset_name = options[:dataset_name] || nil
23
+ output = options[:output] || :file
24
+ output_base = options[:output_base] || nil
25
+
26
+ @dimensions = %w{Variant_Classification Variant_Type dbSNP_Val_Status Verification_Status Validation_Status Mutation_Status Sequence_Source Sequencer}
27
+ # @codes = %w{Variant_Classification Variant_Type}
28
+ @codes = @dimensions
29
+ @measures = (COLUMN_NAMES - @dimensions - @codes)
30
+ @dataset_name ||= File.basename(input_file,'.*')
31
+ @barcode_index = COLUMN_NAMES.index('Tumor_Sample_Barcode')
32
+
33
+ options[:no_labels] ||= true
34
+ options[:lookup_hugo] ||= false
35
+ options[:complex_objects] ||= false
36
+ options[:ranges] ||= COMPONENT_RANGES
37
+
38
+
39
+ if output == :print
40
+ str = structure(options)
41
+ f = open(input_file)
42
+ n = 0
43
+ f.each_line{|line|
44
+ processed = process_line(line,n.to_s,options)
45
+ str << processed.first if processed
46
+ n +=1
47
+ }
48
+ str
49
+ else
50
+ # TODO - allow multi file / separate structure output for very large datasets
51
+ # open("#{file_base}_structure.ttl",'w'){|f| f.write structure(options)}
52
+ file_base = output_base || @dataset_name
53
+
54
+ out = open("#{file_base}.ttl",'w')
55
+ out.write(structure(options))
56
+ f = open(input_file)
57
+ n = 0
58
+ f.each_line{|line|
59
+ processed = process_line(line,n.to_s,options)
60
+ out.write(processed.first) if processed
61
+ n += 1
62
+ }
63
+ if options[:lookup_hugo]
64
+ post_process(out)
65
+ else
66
+ out
67
+ end
68
+ end
69
+ end
70
+
71
+ def process_line(line,label,options)
72
+ unless line[0] == "#" || line[0..3] == "Hugo"
73
+ entry = ::CSV.parse(line, {col_sep: "\t"}).flatten[0..(COLUMN_NAMES.length-3)]
74
+
75
+ entry = (entry.fill(nil,entry.length...COLUMN_NAMES.length-2) + parse_barcode(entry[@barcode_index])).flatten
76
+
77
+ entry[0] = "http://identifiers.org/hgnc.symbol/#{entry[0]}" if entry[0]
78
+
79
+ # A 0 in the entrez-id column appears to mean null
80
+ col=1
81
+ entry[col] = nil if entry[col] == '0'
82
+ entry[col] = "http://identifiers.org/ncbigene/#{entry[col]}" if entry[col]
83
+
84
+ # Only link non-novel dbSNP entries
85
+ col = COLUMN_NAMES.index('dbSNP_RS')
86
+ if entry[col] && entry[col][0..1] == "rs"
87
+ entry[col] = "http://identifiers.org/dbsnp/#{entry[col].gsub('rs','')}"
88
+ end
89
+
90
+ # optionally create typed objects using sio nodes
91
+ if options[:complex_objects]
92
+ entry = sio_values(entry)
93
+ end
94
+
95
+ data = {}
96
+ COLUMN_NAMES.each_with_index{|col,i|
97
+ data[col] = [entry[i]]
98
+ }
99
+
100
+ observations(@measures,@dimensions,@codes,data,[label],@dataset_name,options)
101
+ end
102
+ end
103
+
104
+ def sio_values(entry)
105
+ entry[0] = sio_value('http://edamontology.org/data_1791',entry[0]) if entry[0]
106
+
107
+ # Link entrez genes
108
+ col=1
109
+ entry[col] = sio_value("http://identifiers.org/ncbigene",entry[col]) if entry[col]
110
+
111
+ col = COLUMN_NAMES.index('dbSNP_RS')
112
+ entry[col] = sio_value("http://identifiers.org/dbsnp", entry[col])
113
+
114
+ # test SIO attributes for chromosome
115
+ col = COLUMN_NAMES.index('Chromosome')
116
+ entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0000340",entry[col])
117
+
118
+
119
+
120
+ # More SIO attrtibutes for alleles
121
+ %w{Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2}.each{|name|
122
+ col = COLUMN_NAMES.index(name)
123
+ entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0001023",entry[col])
124
+ }
125
+
126
+ col = COLUMN_NAMES.index("Strand")
127
+ entry[col] = sio_attribute("http://edamontology.org/data_0853",entry[col])
128
+
129
+ col = COLUMN_NAMES.index("Center")
130
+ entry[col] = sio_attribute("foaf:homepage",entry[col])
131
+ # entry[col] = [
132
+ # ["a", "foaf:Organization"],
133
+ # ["foaf:homepage", entry[col]],
134
+ # ]
135
+
136
+ # Use faldo for locations End_Position
137
+ col = COLUMN_NAMES.index("Start_Position")
138
+ entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#begin", entry[col],"http://biohackathon.org/resource/faldo#Position")
139
+
140
+ col = COLUMN_NAMES.index("End_Position")
141
+ entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#end", entry[col],"http://biohackathon.org/resource/faldo#Position")
142
+
143
+ entry
144
+ end
145
+
146
+ def column_replace(entry,column,prefix,value=nil)
147
+ if value
148
+ entry[COLUMN_NAMES.index(column)] = prefix + value
149
+ else
150
+ entry[COLUMN_NAMES.index(column)] += prefix
151
+ end
152
+ end
153
+
154
+ def official_symbol(hugo_symbol)
155
+ qry = <<-EOF
156
+
157
+ SELECT distinct ?official where {
158
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
159
+ UNION
160
+ {?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
161
+
162
+ ?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
163
+ }
164
+
165
+ EOF
166
+
167
+ sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
168
+ sparql.query(qry).map(&:official).first.to_s
169
+ end
170
+
171
+ def parse_barcode(code)
172
+ #TCGA-E9-A22B-01A-11D-A159-09
173
+ [code[5..11], code[13..-1]]
174
+ end
175
+
176
+ def structure(options={})
177
+
178
+ str = prefixes(@dataset_name,options)
179
+ str << data_structure_definition(@measures,@dimensions,@codes,@dataset_name,options)
180
+ str << dataset(@dataset_name,options)
181
+ component_specifications(@measures, @dimensions, @codes, @dataset_name, options).map{ |c| str << c }
182
+ measure_properties(@measures,@dataset_name,options).map{|m| str << m}
183
+ dimension_properties(@dimensions,@codes, @dataset_name,options).map{|d| str << d}
184
+ code_lists(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
185
+ concept_codes(@codes,TCGA_CODES,@dataset_name,options).map{|c| str << c}
186
+ str
187
+ end
188
+
189
+ def post_process(file)
190
+ reg = %r{http://identifiers.org/hgnc.symbol/(\w+)}
191
+ @@hugo_cache ||= {}
192
+ PubliSci::PostProcessor.process(file,file,reg){|g|
193
+ @@hugo_cache[g] ||= official_symbol(g)
194
+ 'http://identifiers.org/hgnc.symbol/' + cache[g]
195
+ }
196
+ end
197
+ end
198
+ end
199
+ end
@@ -1,8 +1,8 @@
1
1
  module PubliSci
2
- module Reader
2
+ module Readers
3
3
  class RCross
4
4
  include PubliSci::Dataset::DataCube
5
- include PubliSci::Reader::Output
5
+ include PubliSci::Readers::Output
6
6
 
7
7
  def generate_n3(client, var, outfile_base, options={})
8
8
  meas = measures(client,var,options)
@@ -75,33 +75,29 @@ module PubliSci
75
75
 
76
76
  def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
77
77
  data = {}
78
- # geno_chr = client.eval("#{var}$geno$'#{chr}'")
79
- # n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
80
- # entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
78
+
81
79
  data["chr"] = []
82
80
  data["genotype"] = []
83
81
  data["individual"] = []
84
82
  data["marker"] = []
85
83
  data["markerpos"] = []
84
+
86
85
  pheno_names = client.eval("names(#{var}$pheno)").to_ruby
87
86
  pheno_names.map{|name|
88
87
  data[name] = []
89
88
  }
90
- # n_individuals.times{|row_individ|
91
- # puts "#{row_individ}/#{n_individuals}"
92
89
  data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
93
90
 
94
91
  pheno_names.map{|name|
95
92
  data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno$#{name}").to_ruby[row_individ])
96
93
  }
97
- # @rexp.payload["geno"].payload.names.map { |chr|
94
+
98
95
  num_markers = geno_chr.payload.first.to_ruby.column_size
99
96
  data["chr"] << (1..num_markers).to_a.fill(chr)
100
97
  data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
101
98
  data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
102
99
  data["markerpos"] << geno_chr.payload["map"].to_a
103
- # }
104
- # }
100
+
105
101
  data.map{|k,v| v.flatten!}
106
102
  data
107
103
  end
@@ -1,5 +1,5 @@
1
1
  module PubliSci
2
- module Reader
2
+ module Readers
3
3
  class RMatrix
4
4
  include PubliSci::Dataset::DataCube
5
5
 
@@ -63,6 +63,7 @@ module PubliSci
63
63
 
64
64
  def codes(input, data_set = nil, select = :label)
65
65
  repo = handle_input(input)
66
+
66
67
  if data_set
67
68
  codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
68
69
  else
@@ -72,6 +73,21 @@ module PubliSci
72
73
  (h[el.first]||=[]) << el.last; h
73
74
  }
74
75
  end
76
+
77
+ def turtle_to_ruby(turtle_file, select_dataset=nil, shorten_url=true)
78
+ repo = RDF::Repository.load(turtle_file)
79
+
80
+ repo_to_ruby(repo,select_dataset,shorten_url)
81
+ end
82
+
83
+ def repo_to_ruby(repo,select_dataset=nil, shorten_url=true)
84
+ select_dataset = dataSet(repo,:dataset) unless select_dataset
85
+ dims = dimensions(repo,select_dataset)
86
+ meas = measures(repo,select_dataset)
87
+ codes = codes(repo,select_dataset)
88
+ data = observations(repo,select_dataset,shorten_url)
89
+ {measures: meas, dimensions: dims, coded_dimensions: codes, data: data}
90
+ end
75
91
  end
76
92
  end
77
93
  end
@@ -0,0 +1,18 @@
1
+ module PubliSci
2
+ module Writers
3
+ class JSON < Base
4
+ def build_json(data)
5
+ data.values.to_json
6
+ end
7
+
8
+ def from_turtle(file,select_dataset=nil,shorten_url=true)
9
+ rb = turtle_to_ruby(file,select_dataset,shorten_url)
10
+ build_json(rb[:data])
11
+ end
12
+
13
+ def from_store(file,select_dataset=nil,shorten_url=true)
14
+ build_json(repo_to_ruby(file,select_dataset,shorten_url)[:data])
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ #version 2.4
2
+ Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID chromosome_name_WU start_WU stop_WU reference_WU variant_WU type_WU gene_name_WU transcript_name_WU transcript_species_WU transcript_source_WU transcript_version_WU strand_WU transcript_status_WU trv_type_WU c_position_WU amino_acid_change_WU ucsc_cons_WU domain_WU all_domains_WU deletion_substructures_WU transcript_error
3
+ A1BG 0 genome.wustl.edu 37 19 58862784 58862784 + Missense_Mutation SNP C C T novel TCGA-E9-A22B-01A-11D-A159-09 TCGA-E9-A22B-10A-01D-A159-09 C C Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx e46a5d19-2dd7-4c34-8fff-6276278c58b3 f948182a-f814-4e3c-83ee-82b78aa423c1 19 58862784 58862784 C T SNP A1BG NM_130786.3 human genbank 58_37c -1 reviewed missense c.883 p.A295T 0.915 HMMSmart_SM00409,superfamily_Immunoglobulin HMMSmart_SM00408,HMMSmart_SM00409,HMMPfam_ig,superfamily_Immunoglobulin - no_errors
4
+ A1BG 0 genome.wustl.edu 37 19 58864366 58864366 + Missense_Mutation SNP G G A rs151098196 byFrequency TCGA-E9-A1NH-01A-11D-A14G-09 TCGA-E9-A1NH-11A-33D-A14G-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx 13c312ec-0add-4758-ab8d-c193e2e08c6d 0ee95056-a7cc-415c-a487-3ad08604dfc0 19 58864366 58864366 G A SNP A1BG NM_130786.3 human genbank 58_37c -1 reviewed missense c.268 p.R90C 0.950 HMMSmart_SM00408,HMMSmart_SM00409,superfamily_Immunoglobulin HMMSmart_SM00408,HMMSmart_SM00409,HMMPfam_ig,superfamily_Immunoglobulin - no_errors
5
+ A1CF 29974 genome.wustl.edu 37 10 52595854 52595854 + Missense_Mutation SNP G G A novel TCGA-BH-A0HP-01A-12D-A099-09 TCGA-BH-A0HP-10A-01D-A099-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx ad52a8fb-7a76-4aa0-95fb-d6edab0fe2b2 8c059d33-23de-439a-914a-290527c5efbe 10 52595854 52595854 G A SNP A1CF NM_138932.1 human genbank 58_37c -1 reviewed missense c.584 p.A195V 1.000 HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_RNA-binding domain RBD HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_dsRNA-binding domain-like,superfamily_RNA-binding domain RBD - no_errors
6
+ A1CF 29974 genome.wustl.edu 37 10 52595937 52595937 + Silent SNP G G A novel TCGA-BH-A18P-01A-11D-A12B-09 TCGA-BH-A18P-11A-43D-A12B-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx add624a3-57e9-46be-9bcc-3e53d7c2dfb7 5cae8dca-b28a-4483-9c03-6f0645161c04 10 52595937 52595937 G A SNP A1CF NM_138932.1 human genbank 58_37c -1 reviewed silent c.501 p.I167 0.615 HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_RNA-binding domain RBD HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_dsRNA-binding domain-like,superfamily_RNA-binding domain RBD - no_errors
7
+ A2BP1 0 genome.wustl.edu 37 16 7568361 7568361 + Silent SNP G G C novel TCGA-D8-A1JN-01A-11D-A13L-09 TCGA-D8-A1JN-10A-01D-A13O-09 G G Unknown Untested Somatic Phase_IV WXS none 1 dbGAP Illumina GAIIx c83c7d48-8671-4f27-b3dd-05411fa2f784 c14cac2a-e308-44fa-b1af-ee51511ee0ee 16 7568361 7568361 G C SNP A2BP1 NM_145891.2 human genbank 58_37c +1 reviewed silent c.300 p.T100 0.995 NULL HMMPfam_RRM_1,HMMSmart_RRM,superfamily_SSF54928 - no_errors
8
+ A2BP1 54715 genome.wustl.edu 37 16 7102099 7102099 + Missense_Mutation SNP G G T novel TCGA-E2-A1BC-01A-11D-A14G-09 TCGA-E2-A1BC-10A-01D-A12Q-09 G G G T G G Unknown Valid Somatic Phase_IV WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx 5947a9db-7d13-44ff-86ad-eb5e6c8dcec5 6a4cd52f-2247-4caf-9b37-e90b02fd4d8b 16 7102099 7102099 G T SNP A2BP1 NM_001142334.1 human genbank 58_37c +1 reviewed missense c.27 p.R9S 1.000 NULL HMMPfam_RRM_1,HMMSmart_SM00360,superfamily_RNA-binding domain RBD - no_errors
9
+ A2BP1 54715 genome.wustl.edu 37 16 7383011 7383011 + Silent SNP G G A novel TCGA-AR-A1AJ-01A-21D-A12Q-09 TCGA-AR-A1AJ-10A-01D-A12Q-09 G G G A G G Unknown Valid Somatic Phase_IV WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx 4e1f9084-4729-4b3f-b036-6226d64fd25b 63ee3781-4578-4d19-88e4-c8785fc7987e 16 7383011 7383011 G A SNP A2BP1 NM_145891.2 human genbank 58_37c +1 reviewed silent c.9 p.A3 1.000 NULL HMMPfam_RRM_1,HMMSmart_RRM,superfamily_SSF54928 - no_errors
10
+ A2M 2 genome.wustl.edu 37 12 9251298 9251298 + Nonsense_Mutation SNP G G A novel TCGA-A8-A08G-01A-11W-A019-09 TCGA-A8-A08G-10A-01W-A021-09 G G G A G G Unknown Valid Somatic Phase_IV WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx 8da61928-e935-4a33-8e46-840e637163d7 74a3a4af-c93a-4fcd-af11-1f5eeb847c3c 12 9251298 9251298 G A SNP A2M NM_000014.4 human genbank 58_37c -1 reviewed nonsense c.1756 p.R586* 0.003 HMMPfam_A2M_N_2 HMMPfam_A2M,HMMPfam_A2M_N,superfamily_Terpenoid cyclases/Protein prenyltransferases,HMMPfam_A2M_recep,superfamily_Alpha-macroglobulin receptor domain,PatternScan_TONB_DEPENDENT_REC_1,HMMPfam_A2M_N_2,HMMPfam_A2M_comp,HMMPfam_Thiol-ester_cl,PatternScan_ALPHA_2_MACROGLOBULIN - no_errors