rbbt-phgx 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/lib/phgx.rb +2 -11
  2. data/lib/rbbt/mutation/fireDB.rb +11 -0
  3. data/lib/rbbt/mutation/polyphen.rb +172 -0
  4. data/lib/rbbt/mutation/sift.rb +112 -0
  5. data/lib/rbbt/mutation/snps_and_go.rb +61 -0
  6. data/lib/rbbt/sources/biogrid.rb +11 -0
  7. data/lib/rbbt/sources/cancer.rb +3 -8
  8. data/lib/rbbt/sources/dbsnp.rb +6 -0
  9. data/lib/rbbt/sources/hprd.rb +6 -0
  10. data/lib/rbbt/sources/kegg.rb +2 -5
  11. data/lib/rbbt/sources/matador.rb +2 -5
  12. data/lib/rbbt/sources/nci.rb +2 -5
  13. data/lib/rbbt/sources/pharmagkb.rb +2 -5
  14. data/lib/rbbt/sources/pina.rb +6 -0
  15. data/lib/rbbt/sources/reactome.rb +6 -0
  16. data/lib/rbbt/sources/sift.rb +5 -0
  17. data/lib/rbbt/sources/stitch.rb +2 -5
  18. data/lib/rbbt/sources/string.rb +2 -5
  19. data/share/Cancer/anais_annotations +7949 -0
  20. data/share/Cancer/anais_interactions +3402 -0
  21. data/share/Cancer/cancer_genes.tsv +428 -0
  22. data/share/install/Biogrid/Rakefile +20 -0
  23. data/share/install/DBSNP/Rakefile +50 -0
  24. data/share/install/HPRD/Rakefile +15 -0
  25. data/share/install/KEGG/Rakefile +3 -3
  26. data/share/install/Matador/Rakefile +2 -2
  27. data/share/install/NCI/Rakefile +2 -2
  28. data/share/install/PharmaGKB/Rakefile +17 -14
  29. data/share/install/Pina/Rakefile +16 -0
  30. data/share/install/Reactome/Rakefile +36 -0
  31. data/share/install/STITCH/Rakefile +5 -5
  32. data/share/install/STRING/Rakefile +2 -2
  33. data/test/rbbt/mutation/test_fireDB.rb +14 -0
  34. data/test/rbbt/mutation/test_polyphen.rb +20 -0
  35. data/test/rbbt/mutation/test_sift.rb +23 -0
  36. data/test/rbbt/mutation/test_snps_and_go.rb +32 -0
  37. data/test/rbbt/sources/test_cancer.rb +1 -1
  38. data/test/rbbt/sources/test_matador.rb +2 -3
  39. data/test/rbbt/sources/test_pharmagkb.rb +1 -1
  40. data/test/rbbt/sources/test_stitch.rb +3 -1
  41. metadata +47 -8
@@ -1,16 +1,7 @@
1
1
  require 'rbbt-util'
2
- require 'rbbt/util/pkg_config'
3
- require 'rbbt/util/pkg_data'
4
- require 'rbbt/util/pkg_software'
5
- require 'rbbt/util/open'
6
- require 'rbbt/util/tmpfile'
7
- require 'rbbt/util/filecache'
2
+ require 'rbbt/util/resource'
8
3
 
9
4
  module PhGx
10
- extend PKGConfig
11
- extend PKGData
12
- extend PKGSoftware
13
-
14
- self.load_cfg(%w(datadir), "datadir: #{File.join(ENV['HOME'], 'phgx', 'data')}\n")
5
+ extend Resource
15
6
  end
16
7
 
@@ -0,0 +1,11 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/cmd'
3
+
4
+ module FireDB
5
+ PhGx.add_software "FireDB" => ['', :directory]
6
+
7
+ def self.predict(accession, sequence, mutation)
8
+ CMD.cmd("perl " + File.join(PhGx.find_software("FireDB"), "firePredText.pl") + " " + [accession, accession, sequence, 10] * " ").read
9
+ end
10
+
11
+ end
@@ -0,0 +1,172 @@
1
+ require 'rbbt/util/open'
2
+ require 'rbbt/util/tsv'
3
+ require 'nokogiri'
4
+ require 'digest/md5'
5
+
6
+ module Polyphen2
7
+
8
+ URL="http://genetics.bwh.harvard.edu/cgi-bin/ggi/ggi2.cgi"
9
+ URL_BASE="http://genetics.bwh.harvard.edu/"
10
+
11
+ module Batch
12
+ OPTIONS = {
13
+ "_ggi_project" => "PPHWeb2",
14
+ "_ggi_origin" => "query",
15
+ "_ggi_batch_file" => "",
16
+ "description" => "",
17
+ "NOTIFYME" => "",
18
+ "uploaded_sequences_1" => "",
19
+ "description_of_uploaded_sequences" => "",
20
+ "MODELNAME" => "HumDiv",
21
+ "UCSCDB" => "hg19",
22
+ "SNPFILTER" => "1",
23
+ "SNPFUNC" => "m",
24
+ "_ggi_target_pipeline" => "Submit Batch",
25
+ }
26
+
27
+ REFRESH_OPTIONS = {
28
+ "sid" => "",
29
+ "_ggi_project" => "PPHWeb2",
30
+ "_ggi_origin" => "manage",
31
+ "_ggi_target_manage" => "Refresh",
32
+ }
33
+
34
+ def self.predict(query)
35
+ options = OPTIONS.merge "_ggi_batch" => query
36
+
37
+ desc = Digest::MD5.hexdigest(options.inspect)
38
+ options["description"] = desc
39
+
40
+ ddd desc
41
+
42
+ doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
43
+
44
+ sid = doc.css('input[name=sid]').attr('value')
45
+
46
+ options = REFRESH_OPTIONS.merge "sid" => sid
47
+ finished = false
48
+
49
+ view_link = nil
50
+ while not finished do
51
+ doc = Nokogiri::HTML(Open.read(Polyphen2::URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
52
+
53
+ result_table = doc.css('body > table')[1].css('table')[2]
54
+
55
+ rows = result_table.css('tr')
56
+
57
+ row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first
58
+
59
+ cells = row.css('td')
60
+ if cells[2].content =~ /Error/
61
+ view_link = nil
62
+ break
63
+ end
64
+
65
+ if cells[1].content =~ /Short/
66
+ view_link = cells[1].css('a').attr('href')
67
+ break
68
+ end
69
+
70
+ sleep 1
71
+ end
72
+
73
+ return nil if view_link.nil?
74
+
75
+ tsv = TSV.new Open.open(Polyphen2::URL_BASE + view_link, :nocache => true), :double, :key => 'acc', :merge => true
76
+
77
+ return tsv
78
+ end
79
+
80
+
81
+ end
82
+
83
+
84
+ OPTIONS = {
85
+ "ContAllHits" => 0,
86
+ "ContThresh" => 6,
87
+ "Map2Mismatch" => 0,
88
+ "MaxHitGaps" => 20,
89
+ "MinHitIde" => 0.5,
90
+ "MinHitLen" => 100,
91
+ "SortByIde" => 1,
92
+ "StructAllHits" => 0,
93
+ "_ggi_jpover" => 1,
94
+ "_ggi_origin" => "query",
95
+ "_ggi_project" => "PPHWeb2",
96
+ "_ggi_target_submit" => "submit",
97
+ "accid" => "A6NFZ4",
98
+ "description" => "",
99
+ "seqpos" => "34",
100
+ "seqres" => "",
101
+ "seqvar1" => "Y",
102
+ "seqvar2" => "D",
103
+ "Submit" => "Submit+Query",
104
+ }
105
+
106
+ REFRESH_OPTIONS = {
107
+ "sid" => "",
108
+ "_ggi_project" => "PPHWeb2",
109
+ "_ggi_origin" => "manage",
110
+ "_ggi_target_manage" => "Refresh",
111
+ }
112
+
113
+ def self.parse_mutation(mutation)
114
+ mutation.match(/([A-Z])(\d+)([A-Z])/i).values_at 1,2,3
115
+ end
116
+
117
+ def self.predict(accession, mutation)
118
+ reference, pos, substitution = parse_mutation(mutation)
119
+
120
+ options = OPTIONS.merge "accid" => accession, "seqpos" => pos, "seqvar1" => reference, "seqvar2" => substitution
121
+
122
+ desc = Digest::MD5.hexdigest(options.inspect)
123
+ options["description"] = desc
124
+
125
+ doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
126
+
127
+ sid = doc.css('input[name=sid]').attr('value')
128
+
129
+ options = REFRESH_OPTIONS.merge "sid" => sid
130
+ finished = false
131
+
132
+ view_link = nil
133
+ while not finished do
134
+ doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-data" => "'#{options.collect{|k,v| [k,v] * "="} * "&"}'"}, :nocache => true))
135
+
136
+ result_table = doc.css('body > table')[1].css('table')[2]
137
+
138
+ rows = result_table.css('tr')
139
+
140
+ row = rows.select{|row| row.css('td').length == 6}.select{|row| row.css('td').last.content.strip == desc}.first
141
+
142
+ cells = row.css('td')
143
+ if cells[2].content =~ /Error/
144
+ view_link = nil
145
+ break
146
+ end
147
+
148
+ if cells[1].content =~ /View/
149
+ view_link = cells[1].css('a').attr('href')
150
+ break
151
+ end
152
+
153
+ sleep 1
154
+ end
155
+
156
+ return nil if view_link.nil?
157
+
158
+
159
+ doc = Nokogiri::HTML(Open.read(URL_BASE + view_link, :nocache => true))
160
+
161
+ para = doc.css('div#HumDivConf > p').first
162
+ div_prediction = para.css('span').first.content
163
+ div_score = para.css('b').first.content
164
+
165
+ para = doc.css('div#HumVarConf > p').first
166
+ var_prediction = para.css('span').first.content
167
+ var_score = para.css('b').first.content
168
+
169
+ return [div_prediction, div_score, var_prediction, var_score]
170
+
171
+ end
172
+ end
@@ -0,0 +1,112 @@
1
+ require 'rbbt/util/open'
2
+ require 'nokogiri'
3
+ module SIFT
4
+ URL_AMINOACID="http://sift.jcvi.org/sift-bin/SIFT_pid_subst_all_submit.pl"
5
+ URL_GENOMIC="http://sift.jcvi.org/sift-bin/SIFT_feed_to_chr_coords.pl"
6
+
7
+ def self.predict_aminoacid_mutation(accession, mutations)
8
+ doc = Nokogiri::HTML(Open.read(URL_AMINOACID, :wget_options => {"--post-data" => "'GI=#{[accession, mutations].flatten * ","}&sequences_to_select=BEST&seq_identity_filter=90'"}, :nocache => false))
9
+
10
+ rows = []
11
+ doc.css('tr').each do |row|
12
+ rows << row.css('td').collect{|cell| cell.content}
13
+ end
14
+
15
+ rows.shift
16
+
17
+ if Array === mutations
18
+ rows
19
+ else
20
+ rows.first
21
+ end
22
+ end
23
+
24
+ def self.predict_aminoacid_mutation_batch(mutations)
25
+ data = case
26
+ when String === mutations
27
+ mutations
28
+ when Array === mutations
29
+ mutations.collect{|p| p * ", "} * "\n" if Array === mutations
30
+ end
31
+
32
+ doc = Nokogiri::HTML(Open.read(URL_AMINOACID, :wget_options => {"--post-data" => "'GI=#{data}&sequences_to_select=BEST&seq_identity_filter=90'"}, :nocache => false))
33
+
34
+ rows = []
35
+ doc.css('tr').each do |row|
36
+ rows << row.css('td').collect{|cell| cell.content}
37
+ end
38
+
39
+ rows.shift
40
+
41
+ if Array === mutations
42
+ rows
43
+ else
44
+ rows.first
45
+ end
46
+ end
47
+
48
+ def self.parse_genomic_mutation(mutation)
49
+ mutation.match(/(\d+):(\d+):(1|-1):([A-Z])\/([A-Z])/).values_at 1,2,3,4,5
50
+ end
51
+
52
+ def self.add_predictions(tsv)
53
+ raise "Input not TSV" unless TSV === tsv
54
+
55
+ raise "Field 'Refseq Protein ID' Not in TSV" unless tsv.fields.include? "Refseq Protein ID"
56
+
57
+ raise "Field 'Protein Mutation' Not in TSV" unless tsv.fields.include? "Protein Mutation"
58
+
59
+ data = []
60
+ tsv.through :key, ["Refseq Protein ID", "Protein Mutation"] do |key,values|
61
+ refseqs, mutations = values
62
+ mutations = mutations.reject{|mutation| mutation[0] == mutation[-1]}
63
+ next if refseqs.nil? or refseqs.compact.reject{|v| v.nil? or v.empty?}.empty? or mutations.empty?
64
+
65
+ refseqs.compact.uniq.each do |refseq|
66
+ data << [refseq, mutations]
67
+ end
68
+ end
69
+
70
+ data.sort!
71
+
72
+
73
+ predictions = {}
74
+ predict_aminoacid_mutation_batch(data).each{|values| predictions[values[0] + ":" << values[1]] = values.values_at 3,4,5,6}
75
+
76
+ refseq_field = tsv.identify_field "Refseq Protein ID"
77
+ protein_field = tsv.identify_field "Protein Mutation"
78
+
79
+ tsv.add_field "SIFT:Prediction" do |key,values|
80
+ refseqs = if refseq_field === :key
81
+ [key]
82
+ else
83
+ values[refseq_field] || []
84
+ end
85
+
86
+ next if refseqs.compact.reject{|v| v.nil? or v.empty?}.empty?
87
+
88
+ mutations = values[protein_field]
89
+
90
+ refseqs.zip(mutations).collect do |refseq,mutation|
91
+ case
92
+ when (mutation.nil? or mutation.empty?)
93
+ "No Prediction"
94
+ when mutation[0] == mutation[-1]
95
+ "TOLERATED"
96
+ when (refseq.nil? or refseq.empty?)
97
+ "No Prediction"
98
+ else
99
+ list = predictions[refseq + ":" << mutation]
100
+ if list.nil?
101
+ "No Prediction"
102
+ else
103
+ list.first
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ tsv
110
+ end
111
+
112
+ end
@@ -0,0 +1,61 @@
1
+ require 'rbbt/util/log'
2
+ require 'rbbt/util/open'
3
+ module SNPSandGO
4
+ URL="http://snps-and-go.biocomp.unibo.it/cgi-bin/snps-and-go/runpred.cgi?uniprot=#ACCESSION#&position=#POSITION#&wild-type=#REFERENCE#&substituting=#SUBSTITUTION#"
5
+
6
+ def self.parse_mutation(mutation)
7
+ mutation.match(/([A-Z])(\d+)([A-Z])/i).values_at 1,2,3
8
+ end
9
+
10
+ def self.predict(accession, mutation)
11
+ reference, pos, substitution = parse_mutation(mutation)
12
+
13
+ url = URL.sub(/#ACCESSION#/,accession).sub(/#POSITION#/, pos).sub(/#REFERENCE#/,reference).sub(/#SUBSTITUTION#/,substitution)
14
+
15
+ res = Open.read(url)
16
+
17
+ raise "Error in prediction" unless res =~ /RESULTS/
18
+
19
+ res.match(/Position\s+WT\s+NEW\s+Effect\s+RI\n\s+\d+\s+[A-Z]\s+[A-Z]\s+(\w+)\s+(\d+)/).values_at 1,2
20
+ end
21
+
22
+ def self.add_predictions(tsv)
23
+ raise "Input not TSV" unless TSV === tsv
24
+
25
+ uniprot_field = tsv.identify_field "UniProt/SwissProt Accession"
26
+ raise "Field 'UniProt/SwissProt Accession' Not in TSV" if uniprot_field.nil?
27
+
28
+ protein_field = tsv.identify_field "Protein Mutation"
29
+ raise "Field 'Protein Mutation' Not in TSV" if protein_field.nil?
30
+
31
+
32
+ tsv.add_field "SNPs&GO:Prediction" do |key,values|
33
+ uniprots = if uniprot_field === :key
34
+ [key]
35
+ else
36
+ values[uniprot_field] || []
37
+ end
38
+
39
+ mutations = values[protein_field]
40
+
41
+ uniprots.zip(mutations).collect{|uniprot,mutation|
42
+ case
43
+ when mutation.nil?
44
+ "No Prediction"
45
+ when mutation[0] == mutation[-1]
46
+ "Neutral"
47
+ when (uniprot.nil? or uniprot.empty?)
48
+ "No Prediction"
49
+ else
50
+ begin
51
+ SNPSandGO.predict(uniprot, mutation).first
52
+ rescue
53
+ "No Prediction"
54
+ end
55
+ end
56
+ }
57
+ end
58
+
59
+ tsv
60
+ end
61
+ end
@@ -0,0 +1,11 @@
1
+ require 'phgx'
2
+
3
+ module Biogrid
4
+ extend Resource
5
+ data_module PhGx
6
+
7
+ ["Hsa", "Rno", "Sce"].each do |organism|
8
+ module_eval "#{ organism } = with_key '#{organism}'"
9
+ end
10
+
11
+ end
@@ -1,12 +1,7 @@
1
1
  require 'phgx'
2
- require 'rbbt/util/data_module'
3
2
 
4
3
  module Cancer
5
- PhGx.add_datafiles :anais_annotations => ['Cancer', 'Cancer/anais-annotations.txt'],
6
- :anais_interactions => ['Cancer', 'Cancer/anais-interactions.txt']
7
-
8
- PKG = PhGx
9
- extend DataModule
4
+ extend Resource
5
+ relative_to Rbbt, "share/Cancer"
6
+ @namespace = "Cancer"
10
7
  end
11
-
12
- if __FILE__ == $0 then NCI.all end
@@ -0,0 +1,6 @@
1
+ require 'phgx'
2
+
3
+ module DBSNP
4
+ extend Resource
5
+ data_module PhGx
6
+ end
@@ -0,0 +1,6 @@
1
+ require 'phgx'
2
+
3
+ module HPRD
4
+ extend Resource
5
+ data_module PhGx
6
+ end
@@ -1,9 +1,6 @@
1
- require 'rbbt/util/data_module'
2
1
  require 'phgx'
3
2
 
4
3
  module KEGG
5
- PKG = PhGx
6
- extend DataModule
4
+ extend Resource
5
+ data_module(PhGx)
7
6
  end
8
-
9
- if __FILE__ == $0 then KEGG.all end
@@ -1,9 +1,6 @@
1
- require 'rbbt/util/data_module'
2
1
  require 'phgx'
3
2
 
4
3
  module Matador
5
- PKG = PhGx
6
- extend DataModule
4
+ extend Resource
5
+ data_module PhGx
7
6
  end
8
-
9
- if __FILE__ == $0 then Matador.all end