rbbt-phgx 0.3.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -28,13 +28,23 @@ module MutationAssessor
28
28
 
29
29
  Log.debug "Querying Mutation Assessor for: #{vars.split(/\n/).length}"
30
30
  tries = 0
31
+ nocache = false
31
32
  begin
32
33
  doc = nil
33
34
  TmpFile.with_file(post_data) do |post_file|
34
- doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-file" => post_file }, :nocache => false))
35
+ Log.medium "Updating cache:" if nocache == :update
36
+ doc = Nokogiri::HTML(Open.read(URL, :wget_options => {"--post-file" => post_file }, :nocache => nocache))
35
37
  end
36
38
 
37
39
  textareas = doc.css('textarea')
40
+
41
+ if textareas.empty?
42
+ puts "No text area"
43
+ puts doc
44
+ puts
45
+ raise NotDone, "No text aread found in response HTML"
46
+ end
47
+
38
48
  result = textareas.last.content
39
49
 
40
50
  if result =~ /Cannot parse variant/
@@ -44,14 +54,16 @@ module MutationAssessor
44
54
  raise "Cannot parse variants. Variants in file #{ variants }"
45
55
  end
46
56
 
47
- raise NotDone, "Not done" if result =~ /\t.sent\t./
57
+ raise NotDone, "Not done" if result =~ /\t\[sent\]\t/
48
58
  rescue NotDone
49
- Log.debug "Mutation Assessor not done, waiting:"
50
- Log.debug result
51
-
52
- sleep 30
53
59
  tries += 1
60
+ nocache = :update
61
+
62
+ Log.medium "Mutation Assessor not done, waiting:"
63
+ sleep 30
64
+
54
65
  if tries < 10
66
+ Log.medium "Retrying mutation assessor"
55
67
  retry
56
68
  else
57
69
  raise "Error processing Mutation Assessor response"
@@ -80,7 +92,7 @@ module MutationAssessor
80
92
  def self.chunked_predict(mutations)
81
93
  chunks = mutations.length.to_f / 1000
82
94
  chunks = chunks.ceil
83
- Misc.divide(mutations.sort, chunks).inject(nil) do |acc, list|
95
+ Misc.divide(mutations.sort_by{|m| m * ":"}, chunks).inject(nil) do |acc, list|
84
96
  if acc.nil?
85
97
  acc = predict(list)
86
98
  else
@@ -145,14 +157,9 @@ module MutationAssessor
145
157
 
146
158
  next if uni_accs.compact.reject{|v| v.nil? or v.empty?}.empty?
147
159
 
148
- ddd uni_accs
149
-
150
160
  mutations = values[protein_field]
151
- ddd mutations
152
161
 
153
162
  uni_accs.zip(mutations).collect do |uni_acc,mutation|
154
- ddd uni_acc
155
- ddd mutation
156
163
  res = case
157
164
  when (mutation.nil? or mutation.empty?)
158
165
  "No Prediction"
@@ -6,7 +6,8 @@ module SIFT
6
6
  URL_ENSP="http://sift.jcvi.org/sift-bin/retrieve_enst.pl"
7
7
 
8
8
  def self.predict(mutations)
9
- doc = Nokogiri::HTML(Open.read(URL_ENSP, :wget_options => {"--post-data" => "'ENSP=#{mutations.collect{|p| p * ","} * "\n"}'"}, :nocache => false))
9
+ data_str = mutations.collect{|mut| mut.sub(':', ',')}.uniq * "\n"
10
+ doc = Nokogiri::HTML(Open.read(URL_ENSP, :wget_options => {"--post-data=" => "'ENSP=#{data_str}'"}))
10
11
 
11
12
  rows = []
12
13
  doc.css('tr').each do |row|
@@ -15,10 +16,20 @@ module SIFT
15
16
 
16
17
  rows.shift
17
18
 
18
- if Array === mutations
19
- rows
19
+ if rows.any?
20
+ TSV.open StringIO.new(rows.collect{|row| row.collect{|v| v.sub(/(ENSP\d+),/,'\1:')} * "\t"} * "\n"), :list,
21
+ :key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"]
20
22
  else
21
- rows.first
23
+ TSV.setup({}, :type => :list, :key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"])
24
+ end
25
+ end
26
+
27
+ def self.chunked_predict(mutations)
28
+ chunks = mutations.length.to_f / 100
29
+ chunks = chunks.ceil
30
+ tsv = TSV.setup({}, :type => :list, :key_field => "Mutated Isoform", :fields =>["Ensembl Protein ID", "Amino Acid Position", "Wildtype Amino Acid", "Mutant Amino Acid", "Prediction", "Score 1", "Score 2", "Score 3"])
31
+ Misc.divide(mutations.uniq.sort, chunks).inject(tsv) do |acc, list|
32
+ acc = TSV.setup(acc.merge(predict(list)))
22
33
  end
23
34
  end
24
35
 
@@ -6,6 +6,7 @@ module KEGG
6
6
  self.pkgdir = "phgx"
7
7
  self.subdir = "share/kegg"
8
8
 
9
+
9
10
  KEGG.claim KEGG.root.find, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
10
11
 
11
12
  def self.names
@@ -13,16 +14,20 @@ module KEGG
13
14
  end
14
15
 
15
16
  def self.descriptions
16
- @@descriptions ||= KEGG.pathways.tsv :fields => ["Pathway Description"], :persist => true, :type => :single
17
+ @@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single).tap{|o| o.unnamed = true}
17
18
  end
18
19
 
19
20
 
21
+ def self.index2genes
22
+ @@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true).tap{|o| o.unnamed = true}
23
+ end
24
+
20
25
  def self.index2ens
21
- @@index2ens ||= KEGG.identifiers.index :persist => true
26
+ @@index2ens ||= KEGG.identifiers.index(:persist => true).tap{|o| o.unnamed = true}
22
27
  end
23
28
 
24
29
  def self.index2kegg
25
- @@index2kegg ||= KEGG.identifiers.index :target => "KEGG Gene ID", :persist => true
30
+ @@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true).tap{|o| o.unnamed = true}
26
31
  end
27
32
 
28
33
  def self.id2name(id)
@@ -34,24 +39,98 @@ module KEGG
34
39
  end
35
40
  end
36
41
 
37
- module Gene
42
+ if defined? Entity
43
+
44
+ module KeggPathway
45
+ extend Entity
46
+ self.format = "KEGG Pathway ID"
47
+
48
+ self.annotation :organism
49
+
50
+ def self.filter(query, field = nil, options = nil, entity = nil)
51
+ return true if query == entity
38
52
 
39
- def to_kegg
40
- if Array === self
41
- Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism)
42
- else
43
- Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism)
53
+ return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
54
+
55
+ false
56
+ end
57
+
58
+ property :name => :single2array do
59
+ return nil if self.nil?
60
+ name = KEGG.id2name(self)
61
+ name.sub(/ - Homo.*/,'') unless name.nil?
62
+ end
63
+
64
+ property :description => :single2array do
65
+ KEGG.description(self)
44
66
  end
45
- end
46
67
 
47
- def from_kegg
48
- if Array === self
49
- Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism)
50
- else
51
- Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism)
68
+ property :genes => :array2single do |*args|
69
+ organism = args.first || self.organism
70
+ @genes ||= KEGG.index2genes.values_at(*self).
71
+ each{|pth| pth.organism = organism if pth.respond_to? :organism }
52
72
  end
53
73
  end
54
74
 
75
+ if defined? Gene and Entity === Gene
76
+ module Gene
77
+ self.format = "KEGG Gene ID"
78
+
79
+ def to_kegg
80
+ return self if format == "KEGG Gene ID"
81
+ if Array === self
82
+ Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism)
83
+ else
84
+ Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism)
85
+ end
86
+ end
87
+
88
+ def _from_kegg
89
+ return self.clean_annotations unless format == "KEGG Gene ID"
90
+ if Array === self
91
+ KEGG.index2ens.values_at(*self)
92
+ else
93
+ KEGG.index2ens[self]
94
+ end
95
+ end
96
+
97
+ def from_kegg
98
+ return self unless format == "KEGG Gene ID"
99
+ Gene.setup(_from_kegg, "Ensembl Gene ID", organism)
100
+ end
101
+
102
+ property :_to => :array2single do |new_format|
103
+ return self if format == new_format
104
+ list = self._from_kegg
105
+
106
+ tsv = Translation.job(:tsv_translate, "", :organism => organism, :genes => list, :format => new_format).exec.tap{|o| o.unnamed = true}
55
107
 
108
+ tsv.values_at(*list)
109
+ end
56
110
 
111
+ property :to! => :array2single do |new_format|
112
+ return self if format == new_format
113
+
114
+ new = _to(new_format)
115
+ new.each_with_index do |n,i|
116
+ c = self.annotated_array_clean_get_brackets(i)
117
+ if c.nil? or n.nil?
118
+ self[i] = nil
119
+ else
120
+ c.replace n
121
+ end
122
+ end
123
+ end
124
+
125
+ property :to => :array2single do |new_format|
126
+ return self if format == new_format
127
+ Gene.setup(_to(new_format), new_format, organism)
128
+ end
129
+
130
+ property :kegg_pathways => :array2single do
131
+ @kegg_pathways ||= KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true).values_at(*self.to_kegg).
132
+ each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
133
+ end
134
+ end
135
+ end
57
136
  end
@@ -6,5 +6,4 @@ module Matador
6
6
  self.subdir = "share/matador"
7
7
 
8
8
  Matador.claim Matador.root.find, :rake, Rbbt.share.install.Matador.Rakefile.find(:lib)
9
-
10
9
  end
@@ -6,5 +6,4 @@ module STRING
6
6
  self.subdir = "share/string"
7
7
 
8
8
  STRING.claim STRING.root.find, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
9
-
10
9
  end
@@ -1,4 +1,4 @@
1
- #Symbol Name GeneID Chr Chr Band Cancer Somatic Mut Cancer Germline Mut Tumour Types (Somatic Mutations) Tumour Types (Germline Mutations) Cancer Syndrome Tissue Type Cancer Molecular Genetics Mutation Type Translocation Partner Other Germline Mut Other Syndrome/Disease
1
+ #Associated Gene Name Name GeneID Chr Chr Band Cancer Somatic Mut Cancer Germline Mut Tumour Types (Somatic Mutations) Tumour Types (Germline Mutations) Cancer Syndrome Tissue Type Cancer Molecular Genetics Mutation Type Translocation Partner Other Germline Mut Other Syndrome/Disease
2
2
  ABL1 v-abl Abelson murine leukemia viral oncogene homolog 1 25 9 9q34.1 yes CML, ALL, T-ALL L Dom T, Mis BCR, ETV6, NUP214
3
3
  ABL2 v-abl Abelson murine leukemia viral oncogene homolog 2 27 1 1q24-q25 yes AML L Dom T ETV6
4
4
  ACSL3 acyl-CoA synthetase long-chain family member 3 2181 2 2q36 yes prostate E Dom T ETV1
@@ -2,9 +2,8 @@ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
2
 
3
3
  define_source_tasks "matador" => "http://matador.embl.de/media/download/matador.tsv.gz"
4
4
 
5
-
6
5
  process_tsv :protein_drug, 'matador',
7
- :key => 3,
6
+ :key_field => 3,
8
7
  :fix => lambda{|l| l.sub(/9606./,'')},
9
8
  :fields => [1,0,7,8,9,10,11,12],
10
9
  :header_hash => "",
@@ -3,16 +3,15 @@ require 'rbbt/mutation/mutation_assessor'
3
3
 
4
4
  class TestMutationAssessor < Test::Unit::TestCase
5
5
 
6
- def _test_predict_aminoacid_mutation
6
+ def test_predict_aminoacid_mutation
7
7
  mutations = {
8
8
  "EGFR_HUMAN" => %w(R521K)
9
9
  }
10
10
 
11
- puts MutationAssessor.predict(mutations)
12
11
  assert_equal 1, MutationAssessor.predict(mutations).length
13
12
  end
14
13
 
15
- def _test_predict_aminoacid_mutation_tsv
14
+ def test_predict_aminoacid_mutation_tsv
16
15
  tsv = TSV.setup({"EGFR_HUMAN" => [%w(R521K)]}, :key_field => "UniProt/SwissProt ID", :fields => ["Protein Mutation"], :type => :double)
17
16
 
18
17
  assert_equal "neutral", MutationAssessor.add_predictions(tsv).slice("MutationAssessor:Prediction").values.first.flatten.first
@@ -26,7 +25,7 @@ class TestMutationAssessor < Test::Unit::TestCase
26
25
  "P53_HUMAN" => %w(R21K),
27
26
  }
28
27
 
29
- puts MutationAssessor.chunked_predict(mutations)
28
+ assert(MutationAssessor.chunked_predict(mutations).include? "EGFR_HUMAN R521K")
30
29
  end
31
30
 
32
31
 
@@ -16,10 +16,18 @@ class TestSIFT < Test::Unit::TestCase
16
16
 
17
17
  assert_equal "TOLERATED", SIFT.predict_aminoacid_mutation_batch( [[accession, mutation]]).first[3]
18
18
  end
19
- def test_predict_aminoacid_mutation_batch
19
+
20
+ def test_predict
20
21
  ensp = "ENSP00000224605"
21
22
  mutation = "A63T"
22
- assert_equal "TOLERATED", SIFT.predict( [[ensp, mutation]]).first[5]
23
+ assert_equal "TOLERATED", SIFT.predict( [[ensp, mutation] * ":"]).values.first["Prediction"]
23
24
  end
24
25
 
26
+ def test_chunked_predict
27
+ ensp = "ENSP00000224605"
28
+ mutation = "A63T"
29
+ assert_equal "TOLERATED", SIFT.chunked_predict( [[ensp, mutation] * ":"] * 2000).values.first["Prediction"]
30
+ end
31
+
32
+
25
33
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-phgx
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
- - 0
8
- - 3
9
7
  - 1
10
- version: 0.3.1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-10-03 00:00:00 +02:00
18
+ date: 2012-01-13 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -56,11 +56,9 @@ files:
56
56
  - lib/rbbt/sources/hprd.rb
57
57
  - lib/rbbt/sources/kegg.rb
58
58
  - lib/rbbt/sources/matador.rb
59
- - lib/rbbt/sources/nci.rb
60
59
  - lib/rbbt/sources/pharmagkb.rb
61
60
  - lib/rbbt/sources/pina.rb
62
61
  - lib/rbbt/sources/reactome.rb
63
- - lib/rbbt/sources/sift.rb
64
62
  - lib/rbbt/sources/stitch.rb
65
63
  - lib/rbbt/sources/string.rb
66
64
  - share/Cancer/anais_annotations
@@ -1,6 +0,0 @@
1
- require 'phgx'
2
-
3
- module NCI
4
- extend Resource
5
- data_module PhGx
6
- end
@@ -1,5 +0,0 @@
1
- require 'phgx'
2
-
3
- module SIFT
4
- #PhGx.software.opt.SIFT.define_as_install Rbbt.share.install.software.SIFT.find
5
- end