rbbt-phgx 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZTQ0NDhjZjM0ODlmNmY4MTJjNzk5NmM4Yjg0ZjExN2Y4NGJkMmUwYQ==
5
+ data.tar.gz: !binary |-
6
+ MjY5NTQyNWE0Njk4OGZmNjUwYWI2YzM5MzBkODdhMzcyMDNjZDM5MQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ YzQ3NDlmNzA2OWE5MmE3NGUzYTI5MTIyNzViYzQ0NGQxZDViZjI3ZTE4YmQ1
10
+ ODFlZTk3OWU1NWFhN2U0NWE5YzI3MGE0YTM1ZjFkZGIzYzVkNjI3MDBjMzhl
11
+ MjA4MmNkNjczN2Q0NzdhZjY1YWZiOWM2OTFkNTVjMDU2M2MwZGE=
12
+ data.tar.gz: !binary |-
13
+ OGY4ZjY0YjViYjc2MGU1MTM0MGFmODg0MWQ1YjQwOTJjOTk3MmIxMWUyNjE0
14
+ NTNkZTNlNTI2YzY0MGFiOWYyMzFhMWVhYjY2MmNhMTJiYzI2NWQ2YjE1YjIx
15
+ Yzk3NmM1MmFkMDk2MGNkNmVmYzZkMGExZjVkZDVmZmVjZDQ5ZTY=
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2011 Miguel Vázquez García
1
+ Copyright (c) 2010-2013 Miguel Vázquez García
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -65,7 +65,6 @@ if defined? Entity
65
65
  name = KEGG.id2name(self)
66
66
  name.sub(/ - Homo.*/,'') unless name.nil?
67
67
  end
68
- persist :name
69
68
 
70
69
  property :description => :single2array do
71
70
  KEGG.description(self)
@@ -76,7 +75,6 @@ if defined? Entity
76
75
  KEGG.index2genes.values_at(*self).
77
76
  each{|gene| gene.organism = organism if gene.respond_to? :organism }
78
77
  end
79
- persist :genes
80
78
  end
81
79
 
82
80
  if defined? Gene and Entity === Gene
@@ -86,18 +84,18 @@ if defined? Entity
86
84
  def to_kegg
87
85
  return self if format == "KEGG Gene ID"
88
86
  if Array === self
89
- Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism)
87
+ Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
90
88
  else
91
- Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism)
89
+ Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
92
90
  end
93
91
  end
94
92
 
95
93
  def from_kegg
96
94
  return self unless format == "KEGG Gene ID"
97
95
  if Array === self
98
- Gene.setup KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism
96
+ Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
99
97
  else
100
- Gene.setup KEGG.index2ens[self], "Ensembl Gene ID", organism
98
+ Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
101
99
  end
102
100
  end
103
101
 
@@ -112,19 +110,13 @@ if defined? Entity
112
110
  self
113
111
  when format == "KEGG Gene ID"
114
112
  ensembl = from_kegg.clean_annotations
115
- Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.values_at(*ensembl), new_format, organism)
113
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
116
114
  when new_format == "KEGG Gene ID"
117
115
  to_kegg
118
116
  else
119
- Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self), new_format, organism)
117
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
120
118
  end
121
119
  end
122
- persist :to
123
-
124
- #property :to => :array2single do |new_format|
125
- # return self if format == new_format
126
- # to!(new_format).collect!{|v| Array === v ? v.first : v}
127
- #end
128
120
 
129
121
  property :kegg_pathways => :array2single do
130
122
  @kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
@@ -22,7 +22,6 @@ if defined? Entity and defined? Gene and Entity === Gene
22
22
 
23
23
  all
24
24
  end
25
- #persist :_ary_string_interactors
26
25
  end
27
26
  end
28
27
 
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-phgx
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
5
- prerelease:
4
+ version: 2.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Miguel Vazquez
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-01-09 00:00:00.000000000 Z
11
+ date: 2013-10-21 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rbbt-util
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ! '>='
28
25
  - !ruby/object:Gem::Version
@@ -39,7 +36,6 @@ files:
39
36
  - lib/rbbt/mutation/chasm.rb
40
37
  - lib/rbbt/mutation/fireDB.rb
41
38
  - lib/rbbt/mutation/mutation_assessor.rb
42
- - lib/rbbt/mutation/oncodriveFM.rb
43
39
  - lib/rbbt/mutation/polyphen.rb
44
40
  - lib/rbbt/mutation/sift.rb
45
41
  - lib/rbbt/mutation/snps_and_go.rb
@@ -66,7 +62,6 @@ files:
66
62
  - share/install/STITCH/Rakefile
67
63
  - share/install/STRING/Rakefile
68
64
  - share/install/lib/rake_helper.rb
69
- - share/install/software/OncodriveFM
70
65
  - test/rbbt/sources/test_matador.rb
71
66
  - test/rbbt/sources/test_cancer.rb
72
67
  - test/rbbt/sources/test_stitch.rb
@@ -83,27 +78,26 @@ files:
83
78
  - test/test_helper.rb
84
79
  homepage: http://github.com/mikisvaz/rbbt-phgx
85
80
  licenses: []
81
+ metadata: {}
86
82
  post_install_message:
87
83
  rdoc_options: []
88
84
  require_paths:
89
85
  - lib
90
86
  required_ruby_version: !ruby/object:Gem::Requirement
91
- none: false
92
87
  requirements:
93
88
  - - ! '>='
94
89
  - !ruby/object:Gem::Version
95
90
  version: '0'
96
91
  required_rubygems_version: !ruby/object:Gem::Requirement
97
- none: false
98
92
  requirements:
99
93
  - - ! '>='
100
94
  - !ruby/object:Gem::Version
101
95
  version: '0'
102
96
  requirements: []
103
97
  rubyforge_project:
104
- rubygems_version: 1.8.24
98
+ rubygems_version: 2.0.3
105
99
  signing_key:
106
- specification_version: 3
100
+ specification_version: 4
107
101
  summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
108
102
  test_files:
109
103
  - test/rbbt/sources/test_matador.rb
@@ -1,146 +0,0 @@
1
- require 'rbbt-util'
2
- require 'rbbt/util/open'
3
- require 'rbbt/tsv'
4
- require 'digest/md5'
5
- require 'rbbt/sources/organism'
6
-
7
- module OncodriveFM
8
-
9
- Rbbt.claim Rbbt.software.opt.OncodriveFM, :install, Rbbt.share.install.software.OncodriveFM.find
10
-
11
-
12
- def self.process_cohort(cohort)
13
-
14
- all_mutated_isoforms = cohort.metagenotype.mutated_isoforms.compact.flatten.uniq
15
- nonsense = all_mutated_isoforms.select{|mi| mi.consequence == "MISS-SENSE"}
16
-
17
- mutation_assessor = MutEval.job(:mutation_assessor, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
18
- sift = MutEval.job(:sift, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
19
- polyphen = MutEval.job(:polyphen, "OncodriveFM", :mutations => all_mutated_isoforms.subset(nonsense)).run
20
-
21
- mutation_assessor_max = mutation_assessor.slice("Mutation Assessor Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
22
- sift_max = sift.slice("SIFT Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
23
- polyphen_max = polyphen.slice("Polyphen Score").values.flatten.collect{|v| (v.nil? or v.empty?) ? nil : v.to_f}.compact.max
24
-
25
- mutation_file = []
26
- cohort.each do |genotype|
27
- sample = genotype.jobname
28
- genotype.each do |mutation|
29
- genes = mutation.genes
30
- next if genes.empty?
31
- mut_mis = mutation.mutated_isoforms
32
- next if mut_mis.nil? or mut_mis.empty?
33
- genes.each do |gene|
34
- mis = mut_mis.select{|mi| mi.protein and mi.protein.gene == gene}
35
-
36
- mutation_assessor.values_at(*mis)
37
- ma_score = mutation_assessor.values_at(*mis).compact.collect{|v| v["Mutation Assessor Score"]}.first
38
- sift_score = sift.values_at(*mis).compact.collect{|v| v["SIFT Score"]}.first
39
- polyphen_score = polyphen.values_at(*mis).compact.collect{|v| v["Polyphen Score"]}.first
40
-
41
- ma_score = mutation_assessor_max if mis.select{|mi| mi.truncated}.any?
42
- sift_score = sift_max if mis.select{|mi| mi.truncated}.any?
43
- polyphen_score = polyphen_max if mis.select{|mi| mi.truncated}.any?
44
-
45
- mutation_file << [gene, sift_score || "NA", polyphen_score || "NA", ma_score || "NA", sample] * "\t"
46
- end
47
- end
48
- end
49
-
50
- TmpFile.with_file(mutation_file * "\n") do |fmuts|
51
- TmpFile.with_file do |outdir|
52
- FileUtils.mkdir_p outdir unless File.exists? outdir
53
- name = "Tumor"
54
-
55
- TmpFile.with_file(config(fmuts, outdir, "[TUMOR]" => name)) do |fconf|
56
- puts Open.read(fconf)
57
- CMD.cmd("cd #{Rbbt.software.opt.OncodriveFM.bin.find}; ./pipeline_launcher.pl '#{fconf}'").read
58
- end
59
-
60
- outfile = File.join(outdir, name + '.fimp')
61
- text = Open.read(outfile).gsub(/WARNING.*?\n/m,'').gsub(/\t-\t/,"\t\t").gsub(/\t-$/,"\t")
62
- tsv = TSV.open(StringIO.new(text), :type => :list)
63
- tsv.key_field = "Ensembl Gene ID"
64
- tsv.fields = ["Associated Gene Name", "Sample count", "p-value", "unknown"]
65
-
66
- tsv
67
- end
68
- end
69
-
70
- end
71
-
72
- CONFIG_TEMPLATE=<<-EOF
73
- ###########################################################################################
74
- # Input data specific for the tumor under analysis
75
-
76
- #tumor: This name will be used as prefix to name all intermediate and final pipeline files
77
- tumor='[TUMOR]'
78
-
79
- #mutfile: File that contains the mutations data of the tumor you want to analyze. Each row corresponds to the mutation of one gene in one sample. Its format should be:
80
- #
81
- ####Ensembl_Gene_ID MA_Zscore CHASM_Zscore Sample_ID
82
- mutfile='[MUTFILE]'
83
-
84
- ####numFIS: number of functional scores included in the mutations file and used to compute the functional impact bias
85
- numFIS='[NUMFIS]'
86
-
87
- ###########################################################################################
88
-
89
- ###########################################################################################
90
- # Common input data (change these only if you have downloaded different info files)
91
-
92
- #genes2gos: File that contains the genes2gos mapping
93
- genes2gos='[DATA_DIR]/common/slimgos_distrib/genes2gos'
94
-
95
- #gosdistribs: Directory with the files that contain the distributions of SIFT, PPH2 and MA scores for each slimGOA obtained from 1000genomes.
96
- gosdistribs='[DATA_DIR]/common/slimgos_distrib/'
97
-
98
- #genes2symbols: File that contains the genes2symbols mapping obtained from BioMart. Its format should be:
99
- #
100
- ####Ensembl_Gene_ID Gene_Symbol
101
- genes2symbols='[DATA_DIR]/common/genes2symbols.txt'
102
-
103
- extrec='NONE'
104
-
105
- #genes2probes: File that contains the genes2probes mapping obtained from BioMart. Its format should be:
106
- #
107
- ####Ensembl_Gene_ID Probe_ID
108
- cp='[DATA_DIR]/common/cp.format'
109
-
110
- #genesattr: File that contains genes' longest CDS' lengths obtained from BioMart and genes' basal nsSNVs rates computed from 1000genomes. This are used to assess the statistical significance of genes' mutations recurrence and genes' overmutation rates. Its format should be:
111
- #
112
- ####Ensembl_Gene_ID Longest_CDS_length Basal_nsSNVs_rate
113
- genesattr='[DATA_DIR]/common/ensgenes_cds.recurrence'
114
-
115
- #outdir: Directory to write output files
116
- outdir='[OUTDIR]'
117
-
118
- #tmpdir: Directory to write intermediate files
119
- tmpdir='[TMPDIR]'
120
-
121
- #internal: whether the null distribution will be taken from variants observed in the tumor
122
- internal='[INTERNAL]'
123
- ###########################################################################################
124
- EOF
125
-
126
- def self.config(mutfile, outdir, options = {})
127
- options = Misc.add_defaults options,
128
- "[TUMOR]" => "Tumor",
129
- "[MUTFILE]" => mutfile,
130
- "[NUMFIS]" => 3,
131
- "[DATA_DIR]" => Rbbt.software.opt.OncodriveFM.data.find,
132
- "[OUTDIR]" => outdir,
133
- "[TMPDIR]" => Rbbt.tmp.OncodriveFM.find,
134
- "[INTERNAL]" => 1
135
-
136
- FileUtils.mkdir_p options["[TMPDIR]"] unless File.exists? options["[TMPDIR]"]
137
-
138
- txt = CONFIG_TEMPLATE.dup
139
- options.each do |key,value|
140
- txt.gsub!(key, value.to_s)
141
- end
142
-
143
- txt
144
- end
145
-
146
- end
@@ -1,13 +0,0 @@
1
- #!/bin/bash
2
-
3
- INSTALL_HELPER_FILE="$1"
4
- RBBT_SOFTWARE_DIR="$2"
5
-
6
- source "$INSTALL_HELPER_FILE"
7
-
8
- name='OncodriveFM'
9
- url="http://bg.upf.edu/group/projects/oncodrivefm-1.1.0.tar.gz"
10
-
11
-
12
- install_src "$name" "$url"
13
-