rbbt-phgx 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/LICENSE +1 -1
- data/lib/rbbt/sources/kegg.rb +6 -14
- data/lib/rbbt/sources/string.rb +0 -1
- metadata +5 -11
- data/lib/rbbt/mutation/oncodriveFM.rb +0 -146
- data/share/install/software/OncodriveFM +0 -13
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZTQ0NDhjZjM0ODlmNmY4MTJjNzk5NmM4Yjg0ZjExN2Y4NGJkMmUwYQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjY5NTQyNWE0Njk4OGZmNjUwYWI2YzM5MzBkODdhMzcyMDNjZDM5MQ==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YzQ3NDlmNzA2OWE5MmE3NGUzYTI5MTIyNzViYzQ0NGQxZDViZjI3ZTE4YmQ1
|
10
|
+
ODFlZTk3OWU1NWFhN2U0NWE5YzI3MGE0YTM1ZjFkZGIzYzVkNjI3MDBjMzhl
|
11
|
+
MjA4MmNkNjczN2Q0NzdhZjY1YWZiOWM2OTFkNTVjMDU2M2MwZGE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OGY4ZjY0YjViYjc2MGU1MTM0MGFmODg0MWQ1YjQwOTJjOTk3MmIxMWUyNjE0
|
14
|
+
NTNkZTNlNTI2YzY0MGFiOWYyMzFhMWVhYjY2MmNhMTJiYzI2NWQ2YjE1YjIx
|
15
|
+
Yzk3NmM1MmFkMDk2MGNkNmVmYzZkMGExZjVkZDVmZmVjZDQ5ZTY=
|
data/LICENSE
CHANGED
data/lib/rbbt/sources/kegg.rb
CHANGED
@@ -65,7 +65,6 @@ if defined? Entity
|
|
65
65
|
name = KEGG.id2name(self)
|
66
66
|
name.sub(/ - Homo.*/,'') unless name.nil?
|
67
67
|
end
|
68
|
-
persist :name
|
69
68
|
|
70
69
|
property :description => :single2array do
|
71
70
|
KEGG.description(self)
|
@@ -76,7 +75,6 @@ if defined? Entity
|
|
76
75
|
KEGG.index2genes.values_at(*self).
|
77
76
|
each{|gene| gene.organism = organism if gene.respond_to? :organism }
|
78
77
|
end
|
79
|
-
persist :genes
|
80
78
|
end
|
81
79
|
|
82
80
|
if defined? Gene and Entity === Gene
|
@@ -86,18 +84,18 @@ if defined? Entity
|
|
86
84
|
def to_kegg
|
87
85
|
return self if format == "KEGG Gene ID"
|
88
86
|
if Array === self
|
89
|
-
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism)
|
87
|
+
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
90
88
|
else
|
91
|
-
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism)
|
89
|
+
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
92
90
|
end
|
93
91
|
end
|
94
92
|
|
95
93
|
def from_kegg
|
96
94
|
return self unless format == "KEGG Gene ID"
|
97
95
|
if Array === self
|
98
|
-
Gene.setup
|
96
|
+
Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
99
97
|
else
|
100
|
-
Gene.setup
|
98
|
+
Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
101
99
|
end
|
102
100
|
end
|
103
101
|
|
@@ -112,19 +110,13 @@ if defined? Entity
|
|
112
110
|
self
|
113
111
|
when format == "KEGG Gene ID"
|
114
112
|
ensembl = from_kegg.clean_annotations
|
115
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.
|
113
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
116
114
|
when new_format == "KEGG Gene ID"
|
117
115
|
to_kegg
|
118
116
|
else
|
119
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.
|
117
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
120
118
|
end
|
121
119
|
end
|
122
|
-
persist :to
|
123
|
-
|
124
|
-
#property :to => :array2single do |new_format|
|
125
|
-
# return self if format == new_format
|
126
|
-
# to!(new_format).collect!{|v| Array === v ? v.first : v}
|
127
|
-
#end
|
128
120
|
|
129
121
|
property :kegg_pathways => :array2single do
|
130
122
|
@kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
|
data/lib/rbbt/sources/string.rb
CHANGED
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-phgx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
5
|
-
prerelease:
|
4
|
+
version: 2.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Miguel Vazquez
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-10-21 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rbbt-util
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ! '>='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -39,7 +36,6 @@ files:
|
|
39
36
|
- lib/rbbt/mutation/chasm.rb
|
40
37
|
- lib/rbbt/mutation/fireDB.rb
|
41
38
|
- lib/rbbt/mutation/mutation_assessor.rb
|
42
|
-
- lib/rbbt/mutation/oncodriveFM.rb
|
43
39
|
- lib/rbbt/mutation/polyphen.rb
|
44
40
|
- lib/rbbt/mutation/sift.rb
|
45
41
|
- lib/rbbt/mutation/snps_and_go.rb
|
@@ -66,7 +62,6 @@ files:
|
|
66
62
|
- share/install/STITCH/Rakefile
|
67
63
|
- share/install/STRING/Rakefile
|
68
64
|
- share/install/lib/rake_helper.rb
|
69
|
-
- share/install/software/OncodriveFM
|
70
65
|
- test/rbbt/sources/test_matador.rb
|
71
66
|
- test/rbbt/sources/test_cancer.rb
|
72
67
|
- test/rbbt/sources/test_stitch.rb
|
@@ -83,27 +78,26 @@ files:
|
|
83
78
|
- test/test_helper.rb
|
84
79
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
85
80
|
licenses: []
|
81
|
+
metadata: {}
|
86
82
|
post_install_message:
|
87
83
|
rdoc_options: []
|
88
84
|
require_paths:
|
89
85
|
- lib
|
90
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
-
none: false
|
92
87
|
requirements:
|
93
88
|
- - ! '>='
|
94
89
|
- !ruby/object:Gem::Version
|
95
90
|
version: '0'
|
96
91
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
92
|
requirements:
|
99
93
|
- - ! '>='
|
100
94
|
- !ruby/object:Gem::Version
|
101
95
|
version: '0'
|
102
96
|
requirements: []
|
103
97
|
rubyforge_project:
|
104
|
-
rubygems_version:
|
98
|
+
rubygems_version: 2.0.3
|
105
99
|
signing_key:
|
106
|
-
specification_version:
|
100
|
+
specification_version: 4
|
107
101
|
summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
108
102
|
test_files:
|
109
103
|
- test/rbbt/sources/test_matador.rb
|
@@ -1,146 +0,0 @@
|
|
1
|
-
require 'rbbt-util'
|
2
|
-
require 'rbbt/util/open'
|
3
|
-
require 'rbbt/tsv'
|
4
|
-
require 'digest/md5'
|
5
|
-
require 'rbbt/sources/organism'
|
6
|
-
|
7
|
-
module OncodriveFM
|
8
|
-
|
9
|
-
Rbbt.claim Rbbt.software.opt.OncodriveFM, :install, Rbbt.share.install.software.OncodriveFM.find
|
10
|
-
|
11
|
-
|
12
|
-
def self.process_cohort(cohort)
|
13
|
-
|
14
|
-
all_mutated_isoforms = cohort.metagenotype.mutated_isoforms.compact.flatten.uniq
|
15
|
-
nonsense = all_mutated_isoforms.select{|mi| mi.consequence == "MISS-SENSE"}
|
16
|
-
|
17
|
-
mutation_assessor = MutEval.job(:mutation_assessor, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
|
18
|
-
sift = MutEval.job(:sift, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
|
19
|
-
polyphen = MutEval.job(:polyphen, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
|
20
|
-
|
21
|
-
mutation_assessor_max = mutation_assessor.slice("Mutation Assessor Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
|
22
|
-
sift_max = sift.slice("SIFT Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
|
23
|
-
polyphen_max = polyphen.slice("Polyphen Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
|
24
|
-
|
25
|
-
mutation_file = []
|
26
|
-
cohort.each do |genotype|
|
27
|
-
sample = genotype.jobname
|
28
|
-
genotype.each do |mutation|
|
29
|
-
genes = mutation.genes
|
30
|
-
next if genes.empty?
|
31
|
-
mut_mis = mutation.mutated_isoforms
|
32
|
-
next if mut_mis.nil? or mut_mis.empty?
|
33
|
-
genes.each do |gene|
|
34
|
-
mis = mut_mis.select{|mi| mi.protein and mi.protein.gene == gene}
|
35
|
-
|
36
|
-
mutation_assessor.values_at(*mis)
|
37
|
-
ma_score = mutation_assessor.values_at(*mis).compact.collect{|v| v["Mutation Assessor Score"]}.first
|
38
|
-
sift_score = sift.values_at(*mis).compact.collect{|v| v["SIFT Score"]}.first
|
39
|
-
polyphen_score = polyphen.values_at(*mis).compact.collect{|v| v["Polyphen Score"]}.first
|
40
|
-
|
41
|
-
ma_score = mutation_assessor_max if mis.select{|mi| mi.truncated}.any?
|
42
|
-
sift_score = sift_max if mis.select{|mi| mi.truncated}.any?
|
43
|
-
polyphen_score = polyphen_max if mis.select{|mi| mi.truncated}.any?
|
44
|
-
|
45
|
-
mutation_file << [gene, sift_score || "NA", polyphen_score || "NA", ma_score || "NA", sample] * "\t"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
TmpFile.with_file(mutation_file * "\n") do |fmuts|
|
51
|
-
TmpFile.with_file do |outdir|
|
52
|
-
FileUtils.mkdir_p outdir unless File.exists? outdir
|
53
|
-
name = "Tumor"
|
54
|
-
|
55
|
-
TmpFile.with_file(config(fmuts, outdir, "[TUMOR]" => name)) do |fconf|
|
56
|
-
puts Open.read(fconf)
|
57
|
-
CMD.cmd("cd #{Rbbt.software.opt.OncodriveFM.bin.find}; ./pipeline_launcher.pl '#{fconf}'").read
|
58
|
-
end
|
59
|
-
|
60
|
-
outfile = File.join(outdir, name + '.fimp')
|
61
|
-
text = Open.read(outfile).gsub(/WARNING.*?\n/m,'').gsub(/\t-\t/,"\t\t").gsub(/\t-$/,"\t")
|
62
|
-
tsv = TSV.open(StringIO.new(text), :type => :list)
|
63
|
-
tsv.key_field = "Ensembl Gene ID"
|
64
|
-
tsv.fields = ["Associated Gene Name", "Sample count", "p-value", "unknown"]
|
65
|
-
|
66
|
-
tsv
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
71
|
-
|
72
|
-
CONFIG_TEMPLATE=<<-EOF
|
73
|
-
###########################################################################################
|
74
|
-
# Input data specific for the tumor under analysis
|
75
|
-
|
76
|
-
#tumor: This name will be used as prefix to name all intermediate and final pipeline files
|
77
|
-
tumor='[TUMOR]'
|
78
|
-
|
79
|
-
#mutfile: File that contains the mutations data of the tumor you want to analyze. Each row corresponds to the mutation of one gene in one sample. Its format should be:
|
80
|
-
#
|
81
|
-
####Ensembl_Gene_ID MA_Zscore CHASM_Zscore Sample_ID
|
82
|
-
mutfile='[MUTFILE]'
|
83
|
-
|
84
|
-
####numFIS: number of functional scores included in the mutations file and used to compute the functional impact bias
|
85
|
-
numFIS='[NUMFIS]'
|
86
|
-
|
87
|
-
###########################################################################################
|
88
|
-
|
89
|
-
###########################################################################################
|
90
|
-
# Common input data (change these only if you have downloaded different info files)
|
91
|
-
|
92
|
-
#genes2gos: File that contains the genes2gos mapping
|
93
|
-
genes2gos='[DATA_DIR]/common/slimgos_distrib/genes2gos'
|
94
|
-
|
95
|
-
#gosdistribs: Directory with the files that contain the distributions of SIFT, PPH2 and MA scores for each slimGOA obtained from 1000genomes.
|
96
|
-
gosdistribs='[DATA_DIR]/common/slimgos_distrib/'
|
97
|
-
|
98
|
-
#genes2symbols: File that contains the genes2symbols mapping obtained from BioMart. Its format should be:
|
99
|
-
#
|
100
|
-
####Ensembl_Gene_ID Gene_Symbol
|
101
|
-
genes2symbols='[DATA_DIR]/common/genes2symbols.txt'
|
102
|
-
|
103
|
-
extrec='NONE'
|
104
|
-
|
105
|
-
#genes2probes: File that contains the genes2probes mapping obtained from BioMart. Its format should be:
|
106
|
-
#
|
107
|
-
####Ensembl_Gene_ID Probe_ID
|
108
|
-
cp='[DATA_DIR]/common/cp.format'
|
109
|
-
|
110
|
-
#genesattr: File that contains genes' longest CDS' lengths obtained from BioMart and genes' basal nsSNVs rates computed from 1000genomes. This are used to assess the statistical significance of genes' mutations recurrence and genes' overmutation rates. Its format should be:
|
111
|
-
#
|
112
|
-
####Ensembl_Gene_ID Longest_CDS_length Basal_nsSNVs_rate
|
113
|
-
genesattr='[DATA_DIR]/common/ensgenes_cds.recurrence'
|
114
|
-
|
115
|
-
#outdir: Directory to write output files
|
116
|
-
outdir='[OUTDIR]'
|
117
|
-
|
118
|
-
#tmpdir: Directory to write intermediate files
|
119
|
-
tmpdir='[TMPDIR]'
|
120
|
-
|
121
|
-
#internal: whether the null distribution will be taken from variants observed in the tumor
|
122
|
-
internal='[INTERNAL]'
|
123
|
-
###########################################################################################
|
124
|
-
EOF
|
125
|
-
|
126
|
-
def self.config(mutfile, outdir, options = {})
|
127
|
-
options = Misc.add_defaults options,
|
128
|
-
"[TUMOR]" => "Tumor",
|
129
|
-
"[MUTFILE]" => mutfile,
|
130
|
-
"[NUMFIS]" => 3,
|
131
|
-
"[DATA_DIR]" => Rbbt.software.opt.OncodriveFM.data.find,
|
132
|
-
"[OUTDIR]" => outdir,
|
133
|
-
"[TMPDIR]" => Rbbt.tmp.OncodriveFM.find,
|
134
|
-
"[INTERNAL]" => 1
|
135
|
-
|
136
|
-
FileUtils.mkdir_p options["[TMPDIR]"] unless File.exists? options["[TMPDIR]"]
|
137
|
-
|
138
|
-
txt = CONFIG_TEMPLATE.dup
|
139
|
-
options.each do |key,value|
|
140
|
-
txt.gsub!(key, value.to_s)
|
141
|
-
end
|
142
|
-
|
143
|
-
txt
|
144
|
-
end
|
145
|
-
|
146
|
-
end
|