rbbt-phgx 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,12 +5,13 @@ require 'digest/md5'
5
5
  module MutationAssessor
6
6
 
7
7
  class NotDone < StandardError; end
8
- URL="http://mutationassessor.org/"
8
+ URL="http://mutationassessor.org"
9
9
  ASTERISK = "*"[0]
10
10
 
11
11
  # mutations is a hash of genes in Uniprot protein accession pointing to lists
12
12
  # of aminoacid substitutions
13
13
  def self.predict(mutations)
14
+ return TSV.setup({}, :header_hash => "", :type => :list) if mutations.empty? or mutations.nil?
14
15
  vars = mutations.collect{|gene, list|
15
16
  list = [list] unless Array === list
16
17
  list.collect do |mut|
@@ -36,12 +37,11 @@ module MutationAssessor
36
37
  doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-file" => post_file }, :nocache => nocache))
37
38
  end
38
39
 
39
- textareas = doc.css('textarea')
40
+ textareas = doc.css('p')
40
41
 
41
42
  if textareas.empty?
42
- puts "No text area"
43
- puts doc
44
- puts
43
+ Log.debug "No text area"
44
+ Log.debug doc.to_s
45
45
  raise NotDone, "No text aread found in response HTML"
46
46
  end
47
47
 
@@ -70,11 +70,11 @@ module MutationAssessor
70
70
  end
71
71
  end
72
72
 
73
- if result.empty?
73
+ if result.empty? and mutations.any?
74
74
  tmp = TmpFile.tmp_file
75
75
  html = tmp + ".html"
76
76
  variants = tmp + ".list"
77
- Open.write(tmp, doc.content)
77
+ Open.write(html, doc.content)
78
78
  Open.write(variants, post_data )
79
79
  raise "Result empty. Possible error. html in #{ html }, variants in #{variants}"
80
80
  end
@@ -82,22 +82,34 @@ module MutationAssessor
82
82
  result.sub! /^\t/, ''
83
83
  result.gsub! /\n\s*\d+\s*\t/s, "\n"
84
84
 
85
+ Log.medium "Mutation Assessor DONE."
86
+
85
87
  if result.empty?
86
88
  TSV.setup({}, :header_hash => "", :type => :list)
87
89
  else
88
- TSV.open(StringIO.new(result), :header_hash => "", :type => :list)
90
+ res = TSV.open(StringIO.new(result), :header_hash => "", :type => :list)
91
+ res = res.slice((res.fields - ["Mapping issue"]))
92
+ res
89
93
  end
90
94
  end
91
95
 
92
- def self.chunked_predict(mutations)
93
- chunks = mutations.length.to_f / 1000
96
+ def self.chunked_predict(mutations, max = 1000)
97
+ flattened_mutations = mutations.collect{|g,list| list = [list] unless Array === list; list.collect{|m| [g,m] } }.flatten(1)
98
+ chunks = flattened_mutations.length.to_f / max
94
99
  chunks = chunks.ceil
95
- Misc.divide(mutations.sort_by{|m| m * ":"}, chunks).inject(nil) do |acc, list|
100
+
101
+ Log.debug("Mutation Assessor ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
102
+ num = 1
103
+ Misc.divide(flattened_mutations, chunks).inject(nil) do |acc, list|
104
+ Log.debug("Mutation Assessor ran with #{chunks} chunks: chunk #{num}") if chunks > 1
105
+ unflattened_mutations = {}
106
+ list.each{|g,m| next if g.nil?; unflattened_mutations[g] ||= []; unflattened_mutations[g] << m}
96
107
  if acc.nil?
97
- acc = predict(list)
108
+ acc = predict(unflattened_mutations)
98
109
  else
99
- acc = TSV.setup(acc.merge(predict(list)))
110
+ acc = TSV.setup(acc.merge(predict(unflattened_mutations)))
100
111
  end
112
+ num += 1
101
113
  acc
102
114
  end
103
115
  end
@@ -131,19 +143,15 @@ module MutationAssessor
131
143
 
132
144
  data.sort!
133
145
 
134
-
135
146
  predictions = {}
136
147
  predict(data).each{|uni_acc, values|
137
148
  protein, mutation = uni_acc.split(/\s+/)
138
149
 
139
- values = values.zip_fields
140
- values.each do |v|
141
- pred = v["Func. Impact"]
142
- predictions[protein] ||= {}
143
- predictions[protein][mutation] = pred
144
- end
150
+ pred = values["Func. Impact"]
151
+ predictions[protein] ||= {}
152
+ predictions[protein][mutation] = pred
145
153
  }
146
-
154
+
147
155
  uni_acc_pos = tsv.identify_field "UniProt/SwissProt ID"
148
156
  protein_field = tsv.identify_field "Protein Mutation"
149
157
 
@@ -169,11 +177,11 @@ module MutationAssessor
169
177
  "No Prediction"
170
178
  else
171
179
  list = []
172
- list = predictions[uni_acc][mutation] if predictions.include? uni_acc
173
- if list.nil?
180
+ pred = predictions[uni_acc][mutation] if predictions.include? uni_acc
181
+ if pred.nil?
174
182
  "No Prediction"
175
183
  else
176
- list.first
184
+ pred
177
185
  end
178
186
  end
179
187
  res
@@ -0,0 +1,146 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/util/open'
3
+ require 'rbbt/tsv'
4
+ require 'digest/md5'
5
+ require 'rbbt/sources/organism'
6
+
7
+ module OncodriveFM
8
+
9
+ Rbbt.claim Rbbt.software.opt.OncodriveFM, :install, Rbbt.share.install.software.OncodriveFM.find
10
+
11
+
12
+ def self.process_cohort(cohort)
13
+
14
+ all_mutated_isoforms = cohort.metagenotype.mutated_isoforms.compact.flatten.uniq
15
+ nonsense = all_mutated_isoforms.select{|mi| mi.consequence == "MISS-SENSE"}
16
+
17
+ mutation_assessor = MutEval.job(:mutation_assessor, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
18
+ sift = MutEval.job(:sift, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
19
+ polyphen = MutEval.job(:polyphen, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
20
+
21
+ mutation_assessor_max = mutation_assessor.slice("Mutation Assessor Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
22
+ sift_max = sift.slice("SIFT Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
23
+ polyphen_max = polyphen.slice("Polyphen Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
24
+
25
+ mutation_file = []
26
+ cohort.each do |genotype|
27
+ sample = genotype.jobname
28
+ genotype.each do |mutation|
29
+ genes = mutation.genes
30
+ next if genes.empty?
31
+ mut_mis = mutation.mutated_isoforms
32
+ next if mut_mis.nil? or mut_mis.empty?
33
+ genes.each do |gene|
34
+ mis = mut_mis.select{|mi| mi.protein and mi.protein.gene == gene}
35
+
36
+ mutation_assessor.values_at(*mis)
37
+ ma_score = mutation_assessor.values_at(*mis).compact.collect{|v| v["Mutation Assessor Score"]}.first
38
+ sift_score = sift.values_at(*mis).compact.collect{|v| v["SIFT Score"]}.first
39
+ polyphen_score = polyphen.values_at(*mis).compact.collect{|v| v["Polyphen Score"]}.first
40
+
41
+ ma_score = mutation_assessor_max if mis.select{|mi| mi.truncated}.any?
42
+ sift_score = sift_max if mis.select{|mi| mi.truncated}.any?
43
+ polyphen_score = polyphen_max if mis.select{|mi| mi.truncated}.any?
44
+
45
+ mutation_file << [gene, sift_score || "NA", polyphen_score || "NA", ma_score || "NA", sample] * "\t"
46
+ end
47
+ end
48
+ end
49
+
50
+ TmpFile.with_file(mutation_file * "\n") do |fmuts|
51
+ TmpFile.with_file do |outdir|
52
+ FileUtils.mkdir_p outdir unless File.exists? outdir
53
+ name = "Tumor"
54
+
55
+ TmpFile.with_file(config(fmuts, outdir, "[TUMOR]" => name)) do |fconf|
56
+ puts Open.read(fconf)
57
+ CMD.cmd("cd #{Rbbt.software.opt.OncodriveFM.bin.find}; ./pipeline_launcher.pl '#{fconf}'").read
58
+ end
59
+
60
+ outfile = File.join(outdir, name + '.fimp')
61
+ text = Open.read(outfile).gsub(/WARNING.*?\n/m,'').gsub(/\t-\t/,"\t\t").gsub(/\t-$/,"\t")
62
+ tsv = TSV.open(StringIO.new(text), :type => :list)
63
+ tsv.key_field = "Ensembl Gene ID"
64
+ tsv.fields = ["Associated Gene Name", "Sample count", "p-value", "unknown"]
65
+
66
+ tsv
67
+ end
68
+ end
69
+
70
+ end
71
+
72
+ CONFIG_TEMPLATE=<<-EOF
73
+ ###########################################################################################
74
+ # Input data specific for the tumor under analysis
75
+
76
+ #tumor: This name will be used as prefix to name all intermediate and final pipeline files
77
+ tumor='[TUMOR]'
78
+
79
+ #mutfile: File that contains the mutations data of the tumor you want to analyze. Each row corresponds to the mutation of one gene in one sample. Its format should be:
80
+ #
81
+ ####Ensembl_Gene_ID MA_Zscore CHASM_Zscore Sample_ID
82
+ mutfile='[MUTFILE]'
83
+
84
+ ####numFIS: number of functional scores included in the mutations file and used to compute the functional impact bias
85
+ numFIS='[NUMFIS]'
86
+
87
+ ###########################################################################################
88
+
89
+ ###########################################################################################
90
+ # Common input data (change these only if you have downloaded different info files)
91
+
92
+ #genes2gos: File that contains the genes2gos mapping
93
+ genes2gos='[DATA_DIR]/common/slimgos_distrib/genes2gos'
94
+
95
+ #gosdistribs: Directory with the files that contain the distributions of SIFT, PPH2 and MA scores for each slimGOA obtained from 1000genomes.
96
+ gosdistribs='[DATA_DIR]/common/slimgos_distrib/'
97
+
98
+ #genes2symbols: File that contains the genes2symbols mapping obtained from BioMart. Its format should be:
99
+ #
100
+ ####Ensembl_Gene_ID Gene_Symbol
101
+ genes2symbols='[DATA_DIR]/common/genes2symbols.txt'
102
+
103
+ extrec='NONE'
104
+
105
+ #genes2probes: File that contains the genes2probes mapping obtained from BioMart. Its format should be:
106
+ #
107
+ ####Ensembl_Gene_ID Probe_ID
108
+ cp='[DATA_DIR]/common/cp.format'
109
+
110
+ #genesattr: File that contains genes' longest CDS' lengths obtained from BioMart and genes' basal nsSNVs rates computed from 1000genomes. This are used to assess the statistical significance of genes' mutations recurrence and genes' overmutation rates. Its format should be:
111
+ #
112
+ ####Ensembl_Gene_ID Longest_CDS_length Basal_nsSNVs_rate
113
+ genesattr='[DATA_DIR]/common/ensgenes_cds.recurrence'
114
+
115
+ #outdir: Directory to write output files
116
+ outdir='[OUTDIR]'
117
+
118
+ #tmpdir: Directory to write intermediate files
119
+ tmpdir='[TMPDIR]'
120
+
121
+ #internal: whether the null distribution will be taken from variants observed in the tumor
122
+ internal='[INTERNAL]'
123
+ ###########################################################################################
124
+ EOF
125
+
126
+ def self.config(mutfile, outdir, options = {})
127
+ options = Misc.add_defaults options,
128
+ "[TUMOR]" => "Tumor",
129
+ "[MUTFILE]" => mutfile,
130
+ "[NUMFIS]" => 3,
131
+ "[DATA_DIR]" => Rbbt.software.opt.OncodriveFM.data.find,
132
+ "[OUTDIR]" => outdir,
133
+ "[TMPDIR]" => Rbbt.tmp.OncodriveFM.find,
134
+ "[INTERNAL]" => 1
135
+
136
+ FileUtils.mkdir_p options["[TMPDIR]"] unless File.exists? options["[TMPDIR]"]
137
+
138
+ txt = CONFIG_TEMPLATE.dup
139
+ options.each do |key,value|
140
+ txt.gsub!(key, value.to_s)
141
+ end
142
+
143
+ txt
144
+ end
145
+
146
+ end
@@ -31,52 +31,81 @@ module Polyphen2
31
31
  "_ggi_target_manage" => "Refresh",
32
32
  }
33
33
 
34
- def self.predict(query)
35
- options = OPTIONS.merge "_ggi_batch" => query
34
+ def self.predict(query)
35
+ options = OPTIONS.merge "_ggi_batch" => query
36
36
 
37
- desc = Digest::MD5.hexdigest(options.inspect)
38
- options["description"] = desc
37
+ desc = Digest::MD5.hexdigest(options.inspect)
38
+ options["description"] = desc
39
39
 
40
- doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
40
+ doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
41
41
 
42
- sid = doc.css('input[name=sid]').attr('value')
42
+ sid = doc.css('input[name=sid]').attr('value')
43
43
 
44
- options = REFRESH_OPTIONS.merge "sid" => sid
45
- finished = false
44
+ options = REFRESH_OPTIONS.merge "sid" => sid
45
+ finished = false
46
46
 
47
- view_link = nil
48
- while not finished do
49
- doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
47
+ view_link = nil
48
+ while not finished do
49
+ doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
50
50
 
51
- result_table = doc.css('body > table')[1].css('table')[2]
51
+ result_table = doc.css('body > table')[1].css('table')[2]
52
52
 
53
- rows = result_table.css('tr')
53
+ rows = result_table.css('tr')
54
54
 
55
- row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first
55
+ row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first
56
56
 
57
- cells = row.css('td')
58
- if cells[2].content =~ /Error/
59
- view_link = nil
60
- break
61
- end
57
+ cells = row.css('td')
58
+ if cells[2].content =~ /Error/
59
+ view_link = nil
60
+ break
61
+ end
62
62
 
63
- if cells[1].content =~ /Short/
64
- view_link = cells[1].css('a').attr('href')
65
- break
63
+ if cells[1].content =~ /Short/
64
+ view_link = cells[1].css('a').attr('href')
65
+ break
66
+ end
67
+
68
+ sleep 5
66
69
  end
67
70
 
68
- sleep 3
69
- end
71
+ return nil if view_link.nil?
70
72
 
71
- return nil if view_link.nil?
73
+ tsv = TSV.open Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :merge => true, :fix => Proc.new{|l| l.gsub(/ *\t */, "\t")}
74
+ tsv.fields = tsv.fields.collect{|f| f.strip}
75
+ tsv.key_field = tsv.key_field.strip
72
76
 
73
- tsv = TSV.open Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :merge => true, :fix => Proc.new{|l| l.gsub(/ *\t */, "\t")}
74
- tsv.fields = tsv.fields.collect{|f| f.strip}
75
- tsv.key_field = tsv.key_field.strip
77
+ new_tsv = TSV.setup({}, :key_field => "Protein Mutation", :fields => tsv.fields)
76
78
 
77
- return tsv
78
- end
79
+ tsv.through do |acc, values|
80
+ values.zip_fields.each do |v|
81
+ pos, wt, mt = v.values_at "o_pos", "o_aa1", "o_aa2"
82
+ key = [acc, [wt,pos,mt] * "" ] * ":"
83
+ new_tsv[key] = v
84
+ end
85
+ end
79
86
 
87
+ return new_tsv
88
+ end
89
+
90
+ def self.chunked_predict(query, max = 1000)
91
+ mutations = query.split("\n")
92
+ chunks = mutations.length.to_f / max
93
+ chunks = chunks.ceil
94
+
95
+ num = 0
96
+ Log.debug("Polyphen2 ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
97
+ Misc.divide(mutations, chunks).inject(nil) do |acc, list|
98
+ num += 1
99
+ Log.debug("Polyphen2 ran with #{chunks} chunks: chunk #{num}") if chunks > 1
100
+ list = list * "\n"
101
+ if acc.nil?
102
+ acc = predict(list)
103
+ else
104
+ acc = TSV.setup(acc.merge(predict(list)))
105
+ end
106
+ acc
107
+ end
108
+ end
80
109
 
81
110
  end
82
111
 
@@ -9,9 +9,14 @@ module SIFT
9
9
  data_str = mutations.collect{|mut| mut.sub(':', ',')}.uniq * "\n"
10
10
  doc = Nokogiri::HTML(Open.read(URL_ENSP, :wget_options => {"--post-data=" => "'ENSP=#{data_str}'"}))
11
11
 
12
+ if doc.to_s.match(/Your computer has exceeded its daily limit/)
13
+ Open.clean_cache(URL_ENSP, :wget_options => {"--post-data=" => "'ENSP=#{data_str}'"})
14
+ raise "Daily limit reached"
15
+ end
16
+
12
17
  rows = []
13
18
  doc.css('tr').each do |row|
14
- rows << row.css('td').collect{|cell| cell.content.strip.sub "\302\240\302\240&nbsp", ""}
19
+ rows << row.css('td').collect{|cell| content = cell.content.strip; content.sub(/\s*&nbsp.*/, "").sub(/[^\w,]*$/,'')}
15
20
  end
16
21
 
17
22
  rows.shift
@@ -24,12 +29,19 @@ module SIFT
24
29
  end
25
30
  end
26
31
 
27
- def self.chunked_predict(mutations)
28
- chunks = mutations.length.to_f / 100
32
+ def self.chunked_predict(mutations, max = 500)
33
+ chunks = mutations.length.to_f / max
29
34
  chunks = chunks.ceil
35
+
36
+ Log.debug("SIFT ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
37
+
30
38
  tsv = TSV.setup({}, :type => :list, :key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"])
39
+ num = 1
31
40
  Misc.divide(mutations.uniq.sort, chunks).inject(tsv) do |acc, list|
32
- acc = TSV.setup(acc.merge(predict(list)))
41
+ Log.debug("SIFT ran with #{chunks} chunks: chunk #{num}") if chunks > 1
42
+ acc = TSV.setup(acc.merge(predict(list)))
43
+ num + 1
44
+ acc
33
45
  end
34
46
  end
35
47
 
@@ -14,7 +14,7 @@ module SNPSandGO
14
14
 
15
15
  res = Open.read(url)
16
16
 
17
- raise "Error in prediction" unless res =~ /RESULTS/
17
+ raise "Error in prediction: #{$1}" if res =~ /ERROR: (.*)/
18
18
 
19
19
  res.match(/Position\s+WT\s+NEW\s+Effect\s+RI\n\s+\d+\s+[A-Z]\s+[A-Z]\s+(\w+)\s+(\d+)/).values_at 1,2
20
20
  end
@@ -0,0 +1,97 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/util/open'
3
+ require 'rbbt/tsv'
4
+ require 'nokogiri'
5
+ require 'digest/md5'
6
+ require 'rest_client'
7
+ require 'rbbt/sources/organism'
8
+
9
+ module TransFIC
10
+
11
+ class NotDone < StandardError; end
12
+
13
+ URL="http://bg.upf.edu/transfic/taskService"
14
+ ASTERISK = "*"[0]
15
+
16
+ # mutations is a hash of genes in Uniprot protein accession pointing to lists
17
+ # of aminoacid substitutions
18
+ def self.predict(mutations)
19
+ options = {}
20
+ ensp2uni = Organism.identifiers("Hsa").index :target => "UniProt/SwissProt ID", :fields => "Ensembl Protein ID", :persist => true
21
+ searchText = mutations.collect{|mutation| protein, change = mutation.split(":"); next if ensp2uni[protein].nil?; [ensp2uni[protein], change] * "\t"}.compact.uniq * "\n"
22
+
23
+ Log.debug "Querying TransFIC for: #{mutations.length} mutations"
24
+
25
+ TmpFile.with_file(searchText) do |file|
26
+ test_url = CMD.cmd("curl -X PUT -T '#{ file }' '#{ URL }'").read
27
+
28
+ result = nil
29
+
30
+ begin
31
+ Misc.insist(5) do
32
+ result = CMD.cmd("curl -X GET '#{ test_url }'").read
33
+
34
+ raise result.split("\n").select{|line| line =~ /Error/}.first if result =~ /Error/
35
+
36
+ while result =~ /executing/
37
+ sleep 10
38
+ result = CMD.cmd("curl -X GET '#{ test_url }'").read
39
+ end
40
+
41
+ raise result.split("\n").select{|line| line =~ /Error/}.first if result =~ /Error/
42
+ end
43
+ rescue
44
+ if $!.message =~ /validating/
45
+ Log.debug(Open.read(file))
46
+ end
47
+ raise $!
48
+ end
49
+
50
+ Log.medium("TransFIC DONE")
51
+
52
+ tsv = TSV.setup({}, :key_field => "Protein Mutation", :fields => %w(siftTransfic siftTransficLabel pph2Transfic pph2TransficLabel maTransfic maTransficLabel), :type => :list)
53
+ result.split("\n").each do |line|
54
+ next if line[0] == "#"[0]
55
+
56
+ id, hgnc, hgncdesc, transcript, ensp, sw, protein_position, amino_acids, sift, polyphen, mass,
57
+ siftTransfic, siftTransficLabel, pph2Transfic, pph2TransficLabel, maTransfic, maTransficLabel = line.split("\t")
58
+
59
+ change = [amino_acids.split("/").first, protein_position, amino_acids.split("/").last] * ""
60
+ mutation = [ensp,change] * ":"
61
+
62
+ tsv[mutation] = [siftTransfic, siftTransficLabel, pph2Transfic, pph2TransficLabel, maTransfic, maTransficLabel]
63
+ end
64
+
65
+ tsv.select(mutations)
66
+ end
67
+ end
68
+
69
+ def self.chunked_predict(mutations, max = 1000)
70
+ chunks = mutations.length.to_f / max
71
+ chunks = chunks.ceil
72
+
73
+ Log.debug("TransFIC ran with #{chunks} chunks of #{ max } mutations") if chunks > 1
74
+ num = 1
75
+ Misc.divide(mutations, chunks).inject(nil) do |acc, list|
76
+ Log.debug("TransFIC ran with #{chunks} chunks: chunk #{num}") if chunks > 1
77
+ begin
78
+ result = predict(list)
79
+ rescue
80
+ if list.length > 2
81
+ Log.debug("Error predicting in transFIC. Divinding list of size #{list.length}")
82
+ result = chunked_predict(list, list.length / 2)
83
+ else
84
+ Log.debug("Error predicting in transFIC. Single error detected")
85
+ next
86
+ end
87
+ end
88
+ if acc.nil?
89
+ acc = result
90
+ else
91
+ acc = TSV.setup(acc.merge(result))
92
+ end
93
+ num += 1
94
+ acc
95
+ end
96
+ end
97
+ end
@@ -10,30 +10,35 @@ module KEGG
10
10
  KEGG.claim KEGG.root.find, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
11
11
 
12
12
  def self.names
13
- @@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single
13
+ @@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
14
14
  end
15
15
 
16
16
  def self.descriptions
17
- @@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single).tap{|o| o.unnamed = true}
17
+ @@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
18
18
  end
19
19
 
20
20
 
21
21
  def self.index2genes
22
- @@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true).tap{|o| o.unnamed = true}
22
+ @@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
23
23
  end
24
24
 
25
25
  def self.index2ens
26
- @@index2ens ||= KEGG.identifiers.index(:persist => true).tap{|o| o.unnamed = true}
26
+ @@index2ens ||= KEGG.identifiers.index(:persist => true)
27
27
  end
28
28
 
29
29
  def self.index2kegg
30
- @@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true).tap{|o| o.unnamed = true}
30
+ @@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
31
31
  end
32
32
 
33
33
  def self.id2name(id)
34
34
  names[id]
35
35
  end
36
36
 
37
+ def self.name2id(name)
38
+ names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
39
+ end
40
+
41
+
37
42
  def self.description(id)
38
43
  descriptions[id]
39
44
  end
@@ -60,6 +65,7 @@ if defined? Entity
60
65
  name = KEGG.id2name(self)
61
66
  name.sub(/ - Homo.*/,'') unless name.nil?
62
67
  end
68
+ persist :name
63
69
 
64
70
  property :description => :single2array do
65
71
  KEGG.description(self)
@@ -67,9 +73,10 @@ if defined? Entity
67
73
 
68
74
  property :genes => :array2single do |*args|
69
75
  organism = args.first || self.organism
70
- @genes ||= KEGG.index2genes.values_at(*self).
71
- each{|pth| pth.organism = organism if pth.respond_to? :organism }
76
+ KEGG.index2genes.values_at(*self).
77
+ each{|gene| gene.organism = organism if gene.respond_to? :organism }
72
78
  end
79
+ persist :genes
73
80
  end
74
81
 
75
82
  if defined? Gene and Entity === Gene
@@ -85,50 +92,42 @@ if defined? Entity
85
92
  end
86
93
  end
87
94
 
88
- def _from_kegg
89
- return self.clean_annotations unless format == "KEGG Gene ID"
95
+ def from_kegg
96
+ return self unless format == "KEGG Gene ID"
90
97
  if Array === self
91
- KEGG.index2ens.values_at(*self)
98
+ Gene.setup KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism
92
99
  else
93
- KEGG.index2ens[self]
100
+ Gene.setup KEGG.index2ens[self], "Ensembl Gene ID", organism
94
101
  end
95
102
  end
96
103
 
97
- def from_kegg
98
- return self unless format == "KEGG Gene ID"
99
- Gene.setup(_from_kegg, "Ensembl Gene ID", organism)
104
+ def self.gene_kegg_pathway_index
105
+ @@gene_kegg_pathway_index ||=
106
+ KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
100
107
  end
101
108
 
102
- property :_to => :array2single do |new_format|
103
- return self if format == new_format
104
- list = self._from_kegg
105
-
106
- tsv = Translation.job(:tsv_translate, "", :organism => organism, :genes => list, :format => new_format).exec.tap{|o| o.unnamed = true}
107
-
108
- tsv.values_at(*list)
109
- end
110
-
111
- property :to! => :array2single do |new_format|
112
- return self if format == new_format
113
-
114
- new = _to(new_format)
115
- new.each_with_index do |n,i|
116
- c = self.annotated_array_clean_get_brackets(i)
117
- if c.nil? or n.nil?
118
- self[i] = nil
119
- else
120
- c.replace n
121
- end
109
+ property :to => :array2single do |new_format|
110
+ case
111
+ when format == new_format
112
+ self
113
+ when format == "KEGG Gene ID"
114
+ ensembl = from_kegg.clean_annotations
115
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.values_at(*ensembl), new_format, organism)
116
+ when new_format == "KEGG Gene ID"
117
+ to_kegg
118
+ else
119
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
122
120
  end
123
121
  end
122
+ persist :to
124
123
 
125
- property :to => :array2single do |new_format|
126
- return self if format == new_format
127
- Gene.setup(_to(new_format), new_format, organism)
128
- end
124
+ #property :to => :array2single do |new_format|
125
+ # return self if format == new_format
126
+ # to!(new_format).collect!{|v| Array === v ? v.first : v}
127
+ #end
129
128
 
130
129
  property :kegg_pathways => :array2single do
131
- @kegg_pathways ||= KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true).values_at(*self.to_kegg).
130
+ @kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
132
131
  each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
133
132
  end
134
133
  end
@@ -5,5 +5,5 @@ module PharmaGKB
5
5
  self.pkgdir = "phgx"
6
6
  self.subdir = "share/pharmagkb"
7
7
 
8
- PharmaGKB.claim PharmaGKB.root.find, :rake, Rbbt.share.install.PharmaGKB.Rakefile.find(:lib)
8
+ PharmaGKB.claim PharmaGKB.root, :rake, Rbbt.share.install.PharmaGKB.Rakefile.find(:lib)
9
9
  end
@@ -7,3 +7,29 @@ module Pina
7
7
 
8
8
  Pina.claim Pina.root.find, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
9
9
  end
10
+
11
+ if defined? Entity and defined? Gene and Entity === Gene
12
+ require 'rbbt/entity/gene'
13
+ require 'rbbt/entity/interactor'
14
+ require 'rbbt/sources/PSI_MI'
15
+
16
+ module Gene
17
+ property :pina_interactors => :array2single do
18
+ ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
19
+ pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
20
+
21
+ int = self.ensembl.collect do |ens|
22
+ uniprot = ens2uniprot[ens]
23
+ list = pina.values_at(*uniprot).compact.collect do |v|
24
+ Misc.zip_fields(v).collect do |o, method, articles|
25
+ Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
26
+ end
27
+ end.flatten.uniq
28
+ Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
29
+ end
30
+
31
+ Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
32
+ end
33
+ end
34
+ end
35
+
@@ -7,3 +7,22 @@ module STRING
7
7
 
8
8
  STRING.claim STRING.root.find, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
9
9
  end
10
+
11
+ if defined? Entity and defined? Gene and Entity === Gene
12
+ module Gene
13
+ property :string_interactors => :array2single do |*args|
14
+ threshold = args.first || 800
15
+ string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
16
+ all = self.ensembl.collect do |gene|
17
+ interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
18
+ Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
19
+ end
20
+
21
+ all.compact.first.annotate all if Annotated === all.compact.first
22
+
23
+ all
24
+ end
25
+ #persist :_ary_string_interactors
26
+ end
27
+ end
28
+
@@ -1,8 +1,8 @@
1
1
  require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
2
 
3
- define_source_tasks "Homo sapiens-20110225.txt" => "http://csbi.ltdk.helsinki.fi/pina/download/Homo%20sapiens-20110225.txt"
3
+ define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
4
4
 
5
- process_tsv :protein_protein, 'Homo sapiens-20110225.txt',
5
+ process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
6
6
  :key => 0,
7
7
  :fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
8
8
  :fields => [1,6,8],
@@ -1,6 +1,6 @@
1
1
  require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
2
 
3
- define_source_tasks "protein_protein" => "http://string-db.org:8080/newstring_download/protein.links.v8.3.txt.gz"
3
+ define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.0.txt.gz"
4
4
 
5
5
  process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
6
6
  headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
@@ -0,0 +1,13 @@
1
+ #!/bin/bash
2
+
3
+ INSTALL_HELPER_FILE="$1"
4
+ RBBT_SOFTWARE_DIR="$2"
5
+
6
+ source "$INSTALL_HELPER_FILE"
7
+
8
+ name='OncodriveFM'
9
+ url="http://bg.upf.edu/group/projects/oncodrivefm-1.1.0.tar.gz"
10
+
11
+
12
+ install_src "$name" "$url"
13
+
@@ -5,7 +5,7 @@ class TestMutationAssessor < Test::Unit::TestCase
5
5
 
6
6
  def test_predict_aminoacid_mutation
7
7
  mutations = {
8
- "EGFR_HUMAN" => %w(R521K)
8
+ "EGFR_HUMAN" => %w(R521E)
9
9
  }
10
10
 
11
11
  assert_equal 1, MutationAssessor.predict(mutations).length
@@ -27,9 +27,5 @@ class TestMutationAssessor < Test::Unit::TestCase
27
27
 
28
28
  assert(MutationAssessor.chunked_predict(mutations).include? "EGFR_HUMAN R521K")
29
29
  end
30
-
31
-
32
-
33
-
34
30
  end
35
31
 
@@ -0,0 +1,13 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/mutation/oncodriveFM'
3
+
4
+ class TestOncodriveFM < Test::Unit::TestCase
5
+
6
+ def test_CLL
7
+ require 'rbbt/workflow'
8
+ Workflow.require_workflow "StudyExplorer"
9
+ s = Study.setup("CLL")
10
+ puts OncodriveFM.process_cohort(s.cohort).select("p-value"){|v| not v.empty? and v.to_f < 0.05}
11
+ end
12
+ end
13
+
@@ -2,7 +2,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helpe
2
2
  require 'rbbt/mutation/polyphen'
3
3
 
4
4
  class TestPolyphen2 < Test::Unit::TestCase
5
- def test_predict_disease
5
+ def _test_predict_disease
6
6
  accession = "A6NFZ4"
7
7
  mutation = "Y34D"
8
8
 
@@ -11,10 +11,12 @@ class TestPolyphen2 < Test::Unit::TestCase
11
11
 
12
12
  def test_batch
13
13
  query =<<-EOF
14
- A6NFZ4 Y34D
14
+ A6NFZ4 34 Y D
15
15
  EOF
16
16
 
17
- assert_equal "probably damaging", Polyphen2::Batch.predict(query)["A6NFZ4_Y34D"]["prediction"].first
17
+ ddd Polyphen2::Batch.predict(query)["A6NFZ4:Y34D"]
18
+ assert_equal "probably damaging", Polyphen2::Batch.predict(query)["A6NFZ4:Y34D"]["prediction"]
19
+ assert_equal "probably damaging", Polyphen2::Batch.chunked_predict(query)["A6NFZ4:Y34D"]["prediction"]
18
20
  end
19
21
  end
20
22
 
@@ -0,0 +1,14 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/mutation/transFIC'
3
+
4
+ class TestTransFIC < Test::Unit::TestCase
5
+
6
+ def test_predict_aminoacid_mutation
7
+ mutations = [
8
+ "ENSP00000275493:R521K"
9
+ ]
10
+ puts TransFIC.predict(mutations)
11
+ end
12
+
13
+ end
14
+
@@ -0,0 +1,20 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/entity/gene'
5
+ require 'rbbt/sources/kegg'
6
+
7
+ class TestKEGG < Test::Unit::TestCase
8
+ def test_kegg_gene
9
+ organism = "Hsa"
10
+ gene = Gene.setup "TP53", "Associated Gene Name", organism
11
+
12
+ assert_equal gene.organism, gene.to_kegg.from_kegg.organism
13
+ assert_equal "KEGG Gene ID", gene.to_kegg.format
14
+ assert_equal organism, gene.to_kegg.organism
15
+ assert_equal gene.ensembl, gene.to_kegg.ensembl
16
+ assert_equal gene.name, gene.to_kegg.ensembl.name
17
+ assert_equal gene.to_kegg.ensembl.name, gene.to_kegg.name
18
+ end
19
+ end
20
+
@@ -3,7 +3,4 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
4
 
5
5
  class Test::Unit::TestCase
6
- def test_datafile(file)
7
- File.join(File.dirname(__FILE__), 'data', file)
8
- end
9
6
  end
metadata CHANGED
@@ -1,64 +1,55 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rbbt-phgx
3
- version: !ruby/object:Gem::Version
4
- hash: 23
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
5
  prerelease:
6
- segments:
7
- - 1
8
- - 0
9
- - 0
10
- version: 1.0.0
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Miguel Vazquez
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-01-13 00:00:00 +01:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
12
+ date: 2012-12-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
22
15
  name: rbbt-util
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
25
17
  none: false
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- hash: 63
30
- segments:
31
- - 4
32
- - 0
33
- - 0
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
34
21
  version: 4.0.0
35
22
  type: :runtime
36
- version_requirements: *id001
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 4.0.0
37
30
  description: Pharmaco-genomics related data sources
38
31
  email: miguel.vazquez@fdi.ucm.es
39
32
  executables: []
40
-
41
33
  extensions: []
42
-
43
- extra_rdoc_files:
34
+ extra_rdoc_files:
44
35
  - LICENSE
45
- files:
36
+ files:
46
37
  - LICENSE
47
38
  - lib/phgx.rb
48
39
  - lib/rbbt/mutation/fireDB.rb
49
40
  - lib/rbbt/mutation/mutation_assessor.rb
41
+ - lib/rbbt/mutation/oncodriveFM.rb
50
42
  - lib/rbbt/mutation/polyphen.rb
51
43
  - lib/rbbt/mutation/sift.rb
52
44
  - lib/rbbt/mutation/snps_and_go.rb
45
+ - lib/rbbt/mutation/transFIC.rb
53
46
  - lib/rbbt/sources/biogrid.rb
54
47
  - lib/rbbt/sources/cancer.rb
55
48
  - lib/rbbt/sources/dbsnp.rb
56
- - lib/rbbt/sources/hprd.rb
57
49
  - lib/rbbt/sources/kegg.rb
58
50
  - lib/rbbt/sources/matador.rb
59
51
  - lib/rbbt/sources/pharmagkb.rb
60
52
  - lib/rbbt/sources/pina.rb
61
- - lib/rbbt/sources/reactome.rb
62
53
  - lib/rbbt/sources/stitch.rb
63
54
  - lib/rbbt/sources/string.rb
64
55
  - share/Cancer/anais_annotations
@@ -66,68 +57,63 @@ files:
66
57
  - share/Cancer/cancer_genes.tsv
67
58
  - share/install/Biogrid/Rakefile
68
59
  - share/install/DBSNP/Rakefile
69
- - share/install/HPRD/Rakefile
70
60
  - share/install/KEGG/Rakefile
71
61
  - share/install/Matador/Rakefile
72
62
  - share/install/NCI/Rakefile
73
63
  - share/install/PharmaGKB/Rakefile
74
64
  - share/install/Pina/Rakefile
75
- - share/install/Reactome/Rakefile
76
65
  - share/install/STITCH/Rakefile
77
66
  - share/install/STRING/Rakefile
78
67
  - share/install/lib/rake_helper.rb
68
+ - share/install/software/OncodriveFM
79
69
  - test/rbbt/sources/test_matador.rb
80
70
  - test/rbbt/sources/test_pharmagkb.rb
81
71
  - test/rbbt/sources/test_stitch.rb
82
72
  - test/rbbt/sources/test_cancer.rb
73
+ - test/rbbt/sources/test_kegg.rb
83
74
  - test/rbbt/mutation/test_snps_and_go.rb
84
75
  - test/rbbt/mutation/test_fireDB.rb
85
76
  - test/rbbt/mutation/test_sift.rb
86
77
  - test/rbbt/mutation/test_polyphen.rb
87
78
  - test/rbbt/mutation/test_mutation_assessor.rb
79
+ - test/rbbt/mutation/test_oncodriveFM.rb
80
+ - test/rbbt/mutation/test_transFIC.rb
88
81
  - test/test_helper.rb
89
- has_rdoc: true
90
82
  homepage: http://github.com/mikisvaz/rbbt-phgx
91
83
  licenses: []
92
-
93
84
  post_install_message:
94
85
  rdoc_options: []
95
-
96
- require_paths:
86
+ require_paths:
97
87
  - lib
98
- required_ruby_version: !ruby/object:Gem::Requirement
88
+ required_ruby_version: !ruby/object:Gem::Requirement
99
89
  none: false
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- hash: 3
104
- segments:
105
- - 0
106
- version: "0"
107
- required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
108
95
  none: false
109
- requirements:
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- hash: 3
113
- segments:
114
- - 0
115
- version: "0"
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
116
100
  requirements: []
117
-
118
101
  rubyforge_project:
119
- rubygems_version: 1.6.2
102
+ rubygems_version: 1.8.24
120
103
  signing_key:
121
104
  specification_version: 3
122
105
  summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
123
- test_files:
106
+ test_files:
124
107
  - test/rbbt/sources/test_matador.rb
125
108
  - test/rbbt/sources/test_pharmagkb.rb
126
109
  - test/rbbt/sources/test_stitch.rb
127
110
  - test/rbbt/sources/test_cancer.rb
111
+ - test/rbbt/sources/test_kegg.rb
128
112
  - test/rbbt/mutation/test_snps_and_go.rb
129
113
  - test/rbbt/mutation/test_fireDB.rb
130
114
  - test/rbbt/mutation/test_sift.rb
131
115
  - test/rbbt/mutation/test_polyphen.rb
132
116
  - test/rbbt/mutation/test_mutation_assessor.rb
117
+ - test/rbbt/mutation/test_oncodriveFM.rb
118
+ - test/rbbt/mutation/test_transFIC.rb
133
119
  - test/test_helper.rb
@@ -1,6 +0,0 @@
1
- require 'phgx'
2
-
3
- module HPRD
4
- extend Resource
5
- data_module PhGx
6
- end
@@ -1,6 +0,0 @@
1
- require 'phgx'
2
-
3
- module Reactome
4
- extend Resource
5
- data_module PhGx
6
- end
@@ -1,15 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
-
3
-
4
- file :protein_protein do |t|
5
- begin
6
- tsv = PhGx.share.hprd["BINARY_PROTEIN_PROTEIN_INTERACTIONS.txt"].tsv :merge => true
7
- rescue
8
- raise "File BINARY_PROTEIN_PROTEIN_INTERACTIONS.txt not found in 'share/hprd', download manually from http://www.hprd.org/"
9
- end
10
- tsv.key_field = "Associated Gene Name 1"
11
- tsv.fields = ["HPRD id 1","RefSeq Protein ID 1","Associated Gene Name 2","HPRD id 2","RefSeq Protein ID 2", "Experiment type", "PMID"]
12
- tsv.namespace = "Hsa"
13
-
14
- Open.write(t.name, tsv.to_s)
15
- end
@@ -1,36 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
-
3
- define_source_tasks "human_ppi" => "http://www.genomeknowledge.org/download/current/homo_sapiens.interactions.txt.gz",
4
- "protein_pathway" => "http://www.genomeknowledge.org/download/current/uniprot_2_pathways.stid.txt",
5
- "pathway_genesets" => "http://www.genomeknowledge.org/download/current/ReactomePathways.gmt.zip"
6
-
7
- process_tsv :protein_protein, 'human_ppi',
8
- :key => 0,
9
- :fix => lambda{|l| l.gsub(/\t[a-z ]+:/i,"\t").gsub(/^[a-z ]+:/i,'')},
10
- :fields => [3,6,7,8],
11
- :header_hash => "#",
12
- :merge => true,
13
- :keep_empty => true do
14
-
15
- headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Interaction Type', 'Reactions Involved', 'Interaction PMIDS']
16
- end
17
-
18
- process_tsv :protein_pathway, 'protein_pathway',
19
- :key => 0,
20
- :fix => lambda{|l| l.gsub(/\t[a-z ]+:/i,"\t").gsub(/^[a-z ]+:/i,'')},
21
- :fields => [1,2],
22
- :header_hash => "#",
23
- :merge => true,
24
- :keep_empty => true do
25
-
26
- headers ['UniProt/SwissProt Accession', 'Pathway ID', 'Pathway Description']
27
- end
28
-
29
- process_tsv :pathway_genesets, 'pathway_genesets',
30
- :key => 0,
31
- :fix => lambda{|l| parts = l.split("\t"); [parts[0], parts[2..-1] * "|"] * "\t"},
32
- :keep_empty => true do
33
-
34
- headers ['Reactome Pathway Name', 'Associated Gene Name']
35
- end
36
-