rbbt-phgx 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/lib/phgx.rb +2 -11
  2. data/lib/rbbt/mutation/fireDB.rb +11 -0
  3. data/lib/rbbt/mutation/polyphen.rb +172 -0
  4. data/lib/rbbt/mutation/sift.rb +112 -0
  5. data/lib/rbbt/mutation/snps_and_go.rb +61 -0
  6. data/lib/rbbt/sources/biogrid.rb +11 -0
  7. data/lib/rbbt/sources/cancer.rb +3 -8
  8. data/lib/rbbt/sources/dbsnp.rb +6 -0
  9. data/lib/rbbt/sources/hprd.rb +6 -0
  10. data/lib/rbbt/sources/kegg.rb +2 -5
  11. data/lib/rbbt/sources/matador.rb +2 -5
  12. data/lib/rbbt/sources/nci.rb +2 -5
  13. data/lib/rbbt/sources/pharmagkb.rb +2 -5
  14. data/lib/rbbt/sources/pina.rb +6 -0
  15. data/lib/rbbt/sources/reactome.rb +6 -0
  16. data/lib/rbbt/sources/sift.rb +5 -0
  17. data/lib/rbbt/sources/stitch.rb +2 -5
  18. data/lib/rbbt/sources/string.rb +2 -5
  19. data/share/Cancer/anais_annotations +7949 -0
  20. data/share/Cancer/anais_interactions +3402 -0
  21. data/share/Cancer/cancer_genes.tsv +428 -0
  22. data/share/install/Biogrid/Rakefile +20 -0
  23. data/share/install/DBSNP/Rakefile +50 -0
  24. data/share/install/HPRD/Rakefile +15 -0
  25. data/share/install/KEGG/Rakefile +3 -3
  26. data/share/install/Matador/Rakefile +2 -2
  27. data/share/install/NCI/Rakefile +2 -2
  28. data/share/install/PharmaGKB/Rakefile +17 -14
  29. data/share/install/Pina/Rakefile +16 -0
  30. data/share/install/Reactome/Rakefile +36 -0
  31. data/share/install/STITCH/Rakefile +5 -5
  32. data/share/install/STRING/Rakefile +2 -2
  33. data/test/rbbt/mutation/test_fireDB.rb +14 -0
  34. data/test/rbbt/mutation/test_polyphen.rb +20 -0
  35. data/test/rbbt/mutation/test_sift.rb +23 -0
  36. data/test/rbbt/mutation/test_snps_and_go.rb +32 -0
  37. data/test/rbbt/sources/test_cancer.rb +1 -1
  38. data/test/rbbt/sources/test_matador.rb +2 -3
  39. data/test/rbbt/sources/test_pharmagkb.rb +1 -1
  40. data/test/rbbt/sources/test_stitch.rb +3 -1
  41. metadata +47 -8
@@ -1,16 +1,7 @@
1
1
  require 'rbbt-util'
2
- require 'rbbt/util/pkg_config'
3
- require 'rbbt/util/pkg_data'
4
- require 'rbbt/util/pkg_software'
5
- require 'rbbt/util/open'
6
- require 'rbbt/util/tmpfile'
7
- require 'rbbt/util/filecache'
2
+ require 'rbbt/util/resource'
8
3
 
9
4
  module PhGx
10
- extend PKGConfig
11
- extend PKGData
12
- extend PKGSoftware
13
-
14
- self.load_cfg(%w(datadir), "datadir: #{File.join(ENV['HOME'], 'phgx', 'data')}\n")
5
+ extend Resource
15
6
  end
16
7
 
@@ -0,0 +1,11 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/cmd'
3
+
4
+ module FireDB
5
+ PhGx.add_software "FireDB" => ['', :directory]
6
+
7
+ def self.predict(accession, sequence, mutation)
8
+ CMD.cmd("perl " + File.join(PhGx.find_software("FireDB"), "firePredText.pl") + " " + [accession, accession, sequence, 10] * " ").read
9
+ end
10
+
11
+ end
@@ -0,0 +1,172 @@
1
+ require 'rbbt/util/open'
2
+ require 'rbbt/util/tsv'
3
+ require 'nokogiri'
4
+ require 'digest/md5'
5
+
6
+ module Polyphen2
7
+
8
+ URL="http://genetics.bwh.harvard.edu/cgi-bin/ggi/ggi2.cgi"
9
+ URL_BASE="http://genetics.bwh.harvard.edu/"
10
+
11
+ module Batch
12
+ OPTIONS = {
13
+ "_ggi_project" => "PPHWeb2",
14
+ "_ggi_origin" => "query",
15
+ "_ggi_batch_file" => "",
16
+ "description" => "",
17
+ "NOTIFYME" => "",
18
+ "uploaded_sequences_1" => "",
19
+ "description_of_uploaded_sequences" => "",
20
+ "MODELNAME" => "HumDiv",
21
+ "UCSCDB" => "hg19",
22
+ "SNPFILTER" => "1",
23
+ "SNPFUNC" => "m",
24
+ "_ggi_target_pipeline" => "Submit Batch",
25
+ }
26
+
27
+ REFRESH_OPTIONS = {
28
+ "sid" => "",
29
+ "_ggi_project" => "PPHWeb2",
30
+ "_ggi_origin" => "manage",
31
+ "_ggi_target_manage" => "Refresh",
32
+ }
33
+
34
+ def self.predict(query)
35
+ options = OPTIONS.merge "_ggi_batch" => query
36
+
37
+ desc = Digest::MD5.hexdigest(options.inspect)
38
+ options["description"] = desc
39
+
40
+ ddd desc
41
+
42
+ doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
43
+
44
+ sid = doc.css('input[name=sid]').attr('value')
45
+
46
+ options = REFRESH_OPTIONS.merge "sid" => sid
47
+ finished = false
48
+
49
+ view_link = nil
50
+ while not finished do
51
+ doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
52
+
53
+ result_table = doc.css('body > table')[1].css('table')[2]
54
+
55
+ rows = result_table.css('tr')
56
+
57
+ row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first
58
+
59
+ cells = row.css('td')
60
+ if cells[2].content =~ /Error/
61
+ view_link = nil
62
+ break
63
+ end
64
+
65
+ if cells[1].content =~ /Short/
66
+ view_link = cells[1].css('a').attr('href')
67
+ break
68
+ end
69
+
70
+ sleep 1
71
+ end
72
+
73
+ return nil if view_link.nil?
74
+
75
+ tsv = TSV.new Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :key => 'acc', :merge => true
76
+
77
+ return tsv
78
+ end
79
+
80
+
81
+ end
82
+
83
+
84
+ OPTIONS = {
85
+ "ContAllHits" => 0,
86
+ "ContThresh" => 6,
87
+ "Map2Mismatch" => 0,
88
+ "MaxHitGaps" => 20,
89
+ "MinHitIde" => 0.5,
90
+ "MinHitLen" => 100,
91
+ "SortByIde" => 1,
92
+ "StructAllHits" => 0,
93
+ "_ggi_jpover" => 1,
94
+ "_ggi_origin" => "query",
95
+ "_ggi_project" => "PPHWeb2",
96
+ "_ggi_target_submit" => "submit",
97
+ "accid" => "A6NFZ4",
98
+ "description" => "",
99
+ "seqpos" => "34",
100
+ "seqres" => "",
101
+ "seqvar1" => "Y",
102
+ "seqvar2" => "D",
103
+ "Submit" => "Submit+Query",
104
+ }
105
+
106
+ REFRESH_OPTIONS = {
107
+ "sid" => "",
108
+ "_ggi_project" => "PPHWeb2",
109
+ "_ggi_origin" => "manage",
110
+ "_ggi_target_manage" => "Refresh",
111
+ }
112
+
113
+ def self.parse_mutation(mutation)
114
+ mutation.match(/([A-Z])(\d+)([A-Z])/i).values_at 1,2,3
115
+ end
116
+
117
+ def self.predict(accession, mutation)
118
+ reference, pos, substitution = parse_mutation(mutation)
119
+
120
+ options = OPTIONS.merge "accid" => accession, "seqpos" => pos, "seqvar1" => reference, "seqvar2" => substitution
121
+
122
+ desc = Digest::MD5.hexdigest(options.inspect)
123
+ options["description"] = desc
124
+
125
+ doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
126
+
127
+ sid = doc.css('input[name=sid]').attr('value')
128
+
129
+ options = REFRESH_OPTIONS.merge "sid" => sid
130
+ finished = false
131
+
132
+ view_link = nil
133
+ while not finished do
134
+ doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
135
+
136
+ result_table = doc.css('body > table')[1].css('table')[2]
137
+
138
+ rows = result_table.css('tr')
139
+
140
+ row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first
141
+
142
+ cells = row.css('td')
143
+ if cells[2].content =~ /Error/
144
+ view_link = nil
145
+ break
146
+ end
147
+
148
+ if cells[1].content =~ /View/
149
+ view_link = cells[1].css('a').attr('href')
150
+ break
151
+ end
152
+
153
+ sleep 1
154
+ end
155
+
156
+ return nil if view_link.nil?
157
+
158
+
159
+ doc = Nokogiri::HTML(Open.read(URL_BASE + view_link, :nocache => true))
160
+
161
+ para = doc.css('div#HumDivConf > p').first
162
+ div_prediction = para.css('span').first.content
163
+ div_score = para.css('b').first.content
164
+
165
+ para = doc.css('div#HumVarConf > p').first
166
+ var_prediction = para.css('span').first.content
167
+ var_score = para.css('b').first.content
168
+
169
+ return [div_prediction, div_score, var_prediction, var_score]
170
+
171
+ end
172
+ end
@@ -0,0 +1,112 @@
1
+ require 'rbbt/util/open'
2
+ require 'nokogiri'
3
+ module SIFT
4
+ URL_AMINOACID="http://sift.jcvi.org/sift-bin/SIFT_pid_subst_all_submit.pl"
5
+ URL_GENOMIC="http://sift.jcvi.org/sift-bin/SIFT_feed_to_chr_coords.pl"
6
+
7
+ def self.predict_aminoacid_mutation(accession, mutations)
8
+ doc = Nokogiri::HTML(Open.read(URL_AMINOACID, :wget_options => {"--post-data" => "'GI=#{[accession, mutations].flatten * ","}&sequences_to_select=BEST&seq_identity_filter=90'"}, :nocache => false))
9
+
10
+ rows = []
11
+ doc.css('tr').each do |row|
12
+ rows << row.css('td').collect{|cell| cell.content}
13
+ end
14
+
15
+ rows.shift
16
+
17
+ if Array === mutations
18
+ rows
19
+ else
20
+ rows.first
21
+ end
22
+ end
23
+
24
+ def self.predict_aminoacid_mutation_batch(mutations)
25
+ data = case
26
+ when String === mutations
27
+ mutations
28
+ when Array === mutations
29
+ mutations.collect{|p| p * ", "} * "\n" if Array === mutations
30
+ end
31
+
32
+ doc = Nokogiri::HTML(Open.read(URL_AMINOACID, :wget_options => {"--post-data" => "'GI=#{data}&sequences_to_select=BEST&seq_identity_filter=90'"}, :nocache => false))
33
+
34
+ rows = []
35
+ doc.css('tr').each do |row|
36
+ rows << row.css('td').collect{|cell| cell.content}
37
+ end
38
+
39
+ rows.shift
40
+
41
+ if Array === mutations
42
+ rows
43
+ else
44
+ rows.first
45
+ end
46
+ end
47
+
48
+ def self.parse_genomic_mutation(mutation)
49
+ mutation.match(/(\d+):(\d+):(1|-1):([A-Z])\/([A-Z])/).values_at 1,2,3,4,5
50
+ end
51
+
52
+ def self.add_predictions(tsv)
53
+ raise "Input not TSV" unless TSV === tsv
54
+
55
+ raise "Field 'Refseq Protein ID' Not in TSV" unless tsv.fields.include? "Refseq Protein ID"
56
+
57
+ raise "Field 'Protein Mutation' Not in TSV" unless tsv.fields.include? "Protein Mutation"
58
+
59
+ data = []
60
+ tsv.through :key, ["Refseq Protein ID", "Protein Mutation"] do |key,values|
61
+ refseqs, mutations = values
62
+ mutations = mutations.reject{|mutation| mutation[0] == mutation[-1]}
63
+ next if refseqs.nil? or refseqs.compact.reject{|v| v.nil? or v.empty?}.empty? or mutations.empty?
64
+
65
+ refseqs.compact.uniq.each do |refseq|
66
+ data << [refseq, mutations]
67
+ end
68
+ end
69
+
70
+ data.sort!
71
+
72
+
73
+ predictions = {}
74
+ predict_aminoacid_mutation_batch(data).each{|values| predictions[values[0] + ":" << values[1]] = values.values_at 3,4,5,6}
75
+
76
+ refseq_field = tsv.identify_field "Refseq Protein ID"
77
+ protein_field = tsv.identify_field "Protein Mutation"
78
+
79
+ tsv.add_field "SIFT:Prediction" do |key,values|
80
+ refseqs = if refseq_field === :key
81
+ [key]
82
+ else
83
+ values[refseq_field] || []
84
+ end
85
+
86
+ next if refseqs.compact.reject{|v| v.nil? or v.empty?}.empty?
87
+
88
+ mutations = values[protein_field]
89
+
90
+ refseqs.zip(mutations).collect do |refseq,mutation|
91
+ case
92
+ when (mutation.nil? or mutation.empty?)
93
+ "No Prediction"
94
+ when mutation[0] == mutation[-1]
95
+ "TOLERATED"
96
+ when (refseq.nil? or refseq.empty?)
97
+ "No Prediction"
98
+ else
99
+ list = predictions[refseq + ":" << mutation]
100
+ if list.nil?
101
+ "No Prediction"
102
+ else
103
+ list.first
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ tsv
110
+ end
111
+
112
+ end
@@ -0,0 +1,61 @@
1
+ require 'rbbt/util/log'
2
+ require 'rbbt/util/open'
3
+ module SNPSandGO
4
+ URL="http://snps-and-go.biocomp.unibo.it/cgi-bin/snps-and-go/runpred.cgi?uniprot=#ACCESSION#&position=#POSITION#&wild-type=#REFERENCE#&substituting=#SUBSTITUTION#"
5
+
6
+ def self.parse_mutation(mutation)
7
+ mutation.match(/([A-Z])(\d+)([A-Z])/i).values_at 1,2,3
8
+ end
9
+
10
+ def self.predict(accession, mutation)
11
+ reference, pos, substitution = parse_mutation(mutation)
12
+
13
+ url = URL.sub(/#ACCESSION#/,accession).sub(/#POSITION#/, pos).sub(/#REFERENCE#/,reference).sub(/#SUBSTITUTION#/,substitution)
14
+
15
+ res = Open.read(url)
16
+
17
+ raise "Error in prediction" unless res =~ /RESULTS/
18
+
19
+ res.match(/Position\s+WT\s+NEW\s+Effect\s+RI\n\s+\d+\s+[A-Z]\s+[A-Z]\s+(\w+)\s+(\d+)/).values_at 1,2
20
+ end
21
+
22
+ def self.add_predictions(tsv)
23
+ raise "Input not TSV" unless TSV === tsv
24
+
25
+ uniprot_field = tsv.identify_field "UniProt/SwissProt Accession"
26
+ raise "Field 'UniProt/SwissProt Accession' Not in TSV" if uniprot_field.nil?
27
+
28
+ protein_field = tsv.identify_field "Protein Mutation"
29
+ raise "Field 'Protein Mutation' Not in TSV" if protein_field.nil?
30
+
31
+
32
+ tsv.add_field "SNPs&GO:Prediction" do |key,values|
33
+ uniprots = if uniprot_field === :key
34
+ [key]
35
+ else
36
+ values[uniprot_field] || []
37
+ end
38
+
39
+ mutations = values[protein_field]
40
+
41
+ uniprots.zip(mutations).collect{|uniprot,mutation|
42
+ case
43
+ when mutation.nil?
44
+ "No Prediction"
45
+ when mutation[0] == mutation[-1]
46
+ "Neutral"
47
+ when (uniprot.nil? or uniprot.empty?)
48
+ "No Prediction"
49
+ else
50
+ begin
51
+ SNPSandGO.predict(uniprot, mutation).first
52
+ rescue
53
+ "No Prediction"
54
+ end
55
+ end
56
+ }
57
+ end
58
+
59
+ tsv
60
+ end
61
+ end
@@ -0,0 +1,11 @@
1
+ require 'phgx'
2
+
3
+ module Biogrid
4
+ extend Resource
5
+ data_module PhGx
6
+
7
+ ["Hsa", "Rno", "Sce"].each do |organism|
8
+ module_eval "#{ organism } = with_key '#{organism}'"
9
+ end
10
+
11
+ end
@@ -1,12 +1,7 @@
1
1
  require 'phgx'
2
- require 'rbbt/util/data_module'
3
2
 
4
3
  module Cancer
5
- PhGx.add_datafiles :anais_annotations => ['Cancer', 'Cancer/anais-annotations.txt'],
6
- :anais_interactions => ['Cancer', 'Cancer/anais-interactions.txt']
7
-
8
- PKG = PhGx
9
- extend DataModule
4
+ extend Resource
5
+ relative_to Rbbt, "share/Cancer"
6
+ @namespace = "Cancer"
10
7
  end
11
-
12
- if __FILE__ == $0 then NCI.all end
@@ -0,0 +1,6 @@
1
+ require 'phgx'
2
+
3
+ module DBSNP
4
+ extend Resource
5
+ data_module PhGx
6
+ end
@@ -0,0 +1,6 @@
1
+ require 'phgx'
2
+
3
+ module HPRD
4
+ extend Resource
5
+ data_module PhGx
6
+ end
@@ -1,9 +1,6 @@
1
- require 'rbbt/util/data_module'
2
1
  require 'phgx'
3
2
 
4
3
  module KEGG
5
- PKG = PhGx
6
- extend DataModule
4
+ extend Resource
5
+ data_module(PhGx)
7
6
  end
8
-
9
- if __FILE__ == $0 then KEGG.all end
@@ -1,9 +1,6 @@
1
- require 'rbbt/util/data_module'
2
1
  require 'phgx'
3
2
 
4
3
  module Matador
5
- PKG = PhGx
6
- extend DataModule
4
+ extend Resource
5
+ data_module PhGx
7
6
  end
8
-
9
- if __FILE__ == $0 then Matador.all end