rbbt-phgx 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/lib/phgx.rb +16 -0
- data/lib/rbbt/sources/cancer.rb +12 -0
- data/lib/rbbt/sources/kegg.rb +9 -0
- data/lib/rbbt/sources/matador.rb +9 -0
- data/lib/rbbt/sources/nci.rb +9 -0
- data/lib/rbbt/sources/pharmagkb.rb +9 -0
- data/lib/rbbt/sources/stitch.rb +9 -0
- data/lib/rbbt/sources/string.rb +9 -0
- data/share/install/KEGG/Rakefile +109 -0
- data/share/install/Matador/Rakefile +15 -0
- data/share/install/NCI/Rakefile +137 -0
- data/share/install/PharmaGKB/Rakefile +149 -0
- data/share/install/STITCH/Rakefile +30 -0
- data/share/install/STRING/Rakefile +8 -0
- data/share/install/lib/rake_helper.rb +93 -0
- data/test/rbbt/sources/test_cancer.rb +11 -0
- data/test/rbbt/sources/test_matador.rb +11 -0
- data/test/rbbt/sources/test_pharmagkb.rb +11 -0
- data/test/rbbt/sources/test_stitch.rb +11 -0
- data/test/test_helper.rb +9 -0
- metadata +91 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010-2011 Miguel Vázquez García
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/lib/phgx.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/util/pkg_config'
|
3
|
+
require 'rbbt/util/pkg_data'
|
4
|
+
require 'rbbt/util/pkg_software'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'rbbt/util/tmpfile'
|
7
|
+
require 'rbbt/util/filecache'
|
8
|
+
|
9
|
+
module PhGx
|
10
|
+
extend PKGConfig
|
11
|
+
extend PKGData
|
12
|
+
extend PKGSoftware
|
13
|
+
|
14
|
+
self.load_cfg(%w(datadir), "datadir: #{File.join(ENV['HOME'], 'phgx', 'data')}\n")
|
15
|
+
end
|
16
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'phgx'
|
2
|
+
require 'rbbt/util/data_module'
|
3
|
+
|
4
|
+
module Cancer
|
5
|
+
PhGx.add_datafiles :anais_annotations => ['Cancer', 'Cancer/anais-annotations.txt'],
|
6
|
+
:anais_interactions => ['Cancer', 'Cancer/anais-interactions.txt']
|
7
|
+
|
8
|
+
PKG = PhGx
|
9
|
+
extend DataModule
|
10
|
+
end
|
11
|
+
|
12
|
+
if __FILE__ == $0 then NCI.all end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
|
4
|
+
"hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
|
5
|
+
"drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
|
6
|
+
"pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
|
7
|
+
|
8
|
+
|
9
|
+
file :genes => 'source/h.sapiens' do |t|
|
10
|
+
pairs = {}
|
11
|
+
entry = nil
|
12
|
+
Open.read(t.prerequisites.first).each do |line|
|
13
|
+
if line =~ /^ENTRY\s+(\d+)/
|
14
|
+
entry = $1
|
15
|
+
next
|
16
|
+
end
|
17
|
+
|
18
|
+
if line =~ /Ensembl: (ENSG\d+)/
|
19
|
+
pairs[entry] = $1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
|
24
|
+
end
|
25
|
+
|
26
|
+
file :gene_drug => 'source/drugs' do |t|
|
27
|
+
pairs = {}
|
28
|
+
drug = nil
|
29
|
+
Open.read(t.prerequisites.first).
|
30
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
31
|
+
if line =~ /^ENTRY\s+(\w+)/
|
32
|
+
drug = $1
|
33
|
+
next
|
34
|
+
end
|
35
|
+
|
36
|
+
if line =~ /TARGET.*?\[HSA:(.*?)\]/
|
37
|
+
genes = $1.split(/\s/)
|
38
|
+
genes.each do |gene|
|
39
|
+
pairs[gene] ||= []
|
40
|
+
pairs[gene] << drug
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
file :drugs => 'source/drugs' do |t|
|
49
|
+
info = {}
|
50
|
+
drug = nil
|
51
|
+
Open.read(t.prerequisites.first).
|
52
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
53
|
+
if line =~ /^ENTRY\s+(\w+)/
|
54
|
+
drug = $1
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
if line =~ /^NAME(.*)/
|
59
|
+
names = $1.split(/;/)
|
60
|
+
names.each do |name|
|
61
|
+
info[drug] ||= [[],[]]
|
62
|
+
info[drug][0] << name.chomp.strip
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if line =~ /^DBLINKS(.*)/
|
67
|
+
$1.match(/PubChem: (\d*)/)
|
68
|
+
pubchem = $1
|
69
|
+
next unless pubchem
|
70
|
+
info[drug] ||= [[],[]]
|
71
|
+
info[drug][1] << pubchem.chomp.strip
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
|
76
|
+
end
|
77
|
+
|
78
|
+
file :pathways => 'source/pathways' do |t|
|
79
|
+
descs = {}
|
80
|
+
names = {}
|
81
|
+
pathway = nil
|
82
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
83
|
+
if line =~ /ENTRY\s+(\w+)/
|
84
|
+
pathway = $1.strip
|
85
|
+
end
|
86
|
+
|
87
|
+
if line =~ /NAME (.*)/
|
88
|
+
names[pathway] = $1.strip
|
89
|
+
end
|
90
|
+
|
91
|
+
if line =~ /DESCRIPTION (.*)/
|
92
|
+
descs[pathway] = $1.strip
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
Open.write(t.name, ['#KEGG Pathway ID', 'Name', 'Description'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway]] * "\t"} * "\n")
|
97
|
+
end
|
98
|
+
|
99
|
+
process_tsv :gene_pathway, 'hsa_gene_map.tab',
|
100
|
+
:sep2 => ' ' do
|
101
|
+
headers ['KEGG Gene ID', 'KEGG Pathways']
|
102
|
+
data do |gene, pathway|
|
103
|
+
"hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
|
108
|
+
|
109
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "matador" => "http://matador.embl.de/media/download/matador.tsv.gz"
|
4
|
+
|
5
|
+
|
6
|
+
process_tsv :protein_drug, 'matador',
|
7
|
+
:native => 3,
|
8
|
+
:fix => lambda{|l| l.sub(/9606./,'')},
|
9
|
+
:extra => [1,0,7,8,9,10,11,12],
|
10
|
+
:header_hash => "",
|
11
|
+
:keep_empty => true do
|
12
|
+
|
13
|
+
headers ['Ensembl Protein ID', 'Chemical', 'Matador ID', 'Score', 'Annotation', 'Mesh_Score', 'Mesh_Annotation', 'Matador_Score', 'Matador_Annotation']
|
14
|
+
end
|
15
|
+
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "cancer_gene" => "https://gforge.nci.nih.gov/frs/download.php/6819/NCI_CancerIndex_allphases_disease.zip",
|
4
|
+
"gene_drug" => "https://gforge.nci.nih.gov/frs/download.php/6821/NCI_CancerIndex_allphases_compound.zip"
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
file :gene_drug => 'source/gene_drug' do |t|
|
9
|
+
entry = nil
|
10
|
+
uniprot = nil
|
11
|
+
drugs = []
|
12
|
+
concepts = []
|
13
|
+
statements = []
|
14
|
+
pmids = []
|
15
|
+
|
16
|
+
info = {}
|
17
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
18
|
+
if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
|
19
|
+
if entry != nil
|
20
|
+
drug_info = {}
|
21
|
+
TSV.zip_fields([drugs, concepts, statements, pmids]).each do |drug_fields|
|
22
|
+
drug, concept, statement, pmid = drug_fields
|
23
|
+
drug_info[drug] ||= {:statements => [], :pmids => []}
|
24
|
+
drug_info[drug][:concept] = concept
|
25
|
+
drug_info[drug][:statements] << statement
|
26
|
+
drug_info[drug][:pmids] << pmid
|
27
|
+
end
|
28
|
+
drugs = drug_info.keys
|
29
|
+
concepts = drug_info.values_at(*drugs).collect{|v| v[:concept]}
|
30
|
+
statements = drug_info.values_at(*drugs).collect{|v| v[:statements] * ";;"}
|
31
|
+
pmids = drug_info.values_at(*drugs).collect{|v| v[:pmids] * ";;"}
|
32
|
+
info[entry] = [entry, drugs * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
|
33
|
+
end
|
34
|
+
entry = $1
|
35
|
+
aliases = []
|
36
|
+
uniprot = nil
|
37
|
+
drugs = []
|
38
|
+
concepts = []
|
39
|
+
statements = []
|
40
|
+
pmids = []
|
41
|
+
end
|
42
|
+
|
43
|
+
if line =~ /<UniProtID>(.*)<\/UniProtID>/
|
44
|
+
uniprot = $1
|
45
|
+
end
|
46
|
+
|
47
|
+
if line =~ /<DrugTerm>(.*)<\/DrugTerm>/
|
48
|
+
drugs << $1 || ""
|
49
|
+
end
|
50
|
+
|
51
|
+
if line =~ /<NCIDrugConceptCode>(.*)<\/NCIDrugConceptCode>/
|
52
|
+
concepts << $1 || ""
|
53
|
+
end
|
54
|
+
|
55
|
+
if line =~ /<Statement>(.*)<\/Statement>/
|
56
|
+
statements << $1 || ""
|
57
|
+
end
|
58
|
+
|
59
|
+
if line =~ /<PubMedID>(.*)<\/PubMedID>/
|
60
|
+
pmids << $1 || ""
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
File.open(t.name, 'w') do |f|
|
65
|
+
f.puts "#" + ['Associated Gene Name', 'Drugs', 'Drug Concepts', 'Statements', 'PMIDS'] * "\t"
|
66
|
+
info.each do |entry, line|
|
67
|
+
f.puts line
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
file :gene_cancer => 'source/cancer_gene' do |t|
|
73
|
+
entry = nil
|
74
|
+
uniprot = nil
|
75
|
+
diseases = []
|
76
|
+
concepts = []
|
77
|
+
statements = []
|
78
|
+
pmids = []
|
79
|
+
|
80
|
+
info = {}
|
81
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
82
|
+
if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
|
83
|
+
if entry != nil
|
84
|
+
disease_info = {}
|
85
|
+
TSV.zip_fields([diseases, concepts, statements, pmids]).each do |disease_fields|
|
86
|
+
disease, concept, statement, pmid = disease_fields
|
87
|
+
disease_info[disease] ||= {:statements => [], :pmids => []}
|
88
|
+
disease_info[disease][:concept] = concept
|
89
|
+
disease_info[disease][:statements] << statement
|
90
|
+
disease_info[disease][:pmids] << pmid
|
91
|
+
end
|
92
|
+
diseases = disease_info.keys
|
93
|
+
concepts = disease_info.values_at(*diseases).collect{|v| v[:concept]}
|
94
|
+
statements = disease_info.values_at(*diseases).collect{|v| v[:statements] * ";;"}
|
95
|
+
pmids = disease_info.values_at(*diseases).collect{|v| v[:pmids] * ";;"}
|
96
|
+
info[entry] = [entry, diseases * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
|
97
|
+
end
|
98
|
+
entry = $1
|
99
|
+
aliases = []
|
100
|
+
uniprot = nil
|
101
|
+
diseases = []
|
102
|
+
concepts = []
|
103
|
+
statements = []
|
104
|
+
pmids = []
|
105
|
+
end
|
106
|
+
|
107
|
+
if line =~ /<UniProtID>(.*)<\/UniProtID>/
|
108
|
+
uniprot = $1
|
109
|
+
end
|
110
|
+
|
111
|
+
if line =~ /<MatchedDiseaseTerm>(.*)<\/MatchedDiseaseTerm>/
|
112
|
+
diseases << $1 || ""
|
113
|
+
end
|
114
|
+
|
115
|
+
if line =~ /<NCIDiseaseConceptCode>(.*)<\/NCIDiseaseConceptCode>/
|
116
|
+
concepts << $1 || ""
|
117
|
+
end
|
118
|
+
|
119
|
+
if line =~ /<Statement>(.*)<\/Statement>/
|
120
|
+
statements << $1 || ""
|
121
|
+
end
|
122
|
+
|
123
|
+
if line =~ /<PubMedID>(.*)<\/PubMedID>/
|
124
|
+
pmids << $1 || ""
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
File.open(t.name, 'w') do |f|
|
129
|
+
f.puts "#" + ['Associated Gene Name', 'Diseases', 'Disease Concepts', 'Statements', 'PMIDS'] * "\t"
|
130
|
+
info.each do |entry, line|
|
131
|
+
f.puts line
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
add_to_defaults [:gene_cancer, :gene_drug]
|
137
|
+
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
|
4
|
+
"drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
|
5
|
+
"diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
|
6
|
+
"relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
|
7
|
+
"variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
|
8
|
+
"pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
|
9
|
+
|
10
|
+
|
11
|
+
process_tsv :diseases, 'diseases',
|
12
|
+
:header_hash => "",
|
13
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
14
|
+
headers ['PhGKB Disease ID']
|
15
|
+
end
|
16
|
+
|
17
|
+
process_tsv :genes, 'genes',
|
18
|
+
:header_hash => "",
|
19
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
20
|
+
headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
|
21
|
+
end
|
22
|
+
|
23
|
+
process_tsv :drugs, 'drugs',
|
24
|
+
:header_hash => "",
|
25
|
+
:extra => ['Name', 'DrugBank Id'],
|
26
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
27
|
+
headers ['PhGKB Drug ID', 'Name', 'DrugBank ID', ]
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
process_tsv :gene_drug, 'relationships',
|
32
|
+
:select => proc{|l| l =~ /Gene:/ && l =~ /Drug:/},
|
33
|
+
:native => 'Entity1_id',
|
34
|
+
:extra => ['Entity2_id','Relationship'],
|
35
|
+
:header_hash => "",
|
36
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
37
|
+
:keep_empty => true do
|
38
|
+
|
39
|
+
headers ['PhGKB Gene ID', 'Drug']
|
40
|
+
end
|
41
|
+
|
42
|
+
process_tsv :gene_disease, 'relationships',
|
43
|
+
:select => proc{|l| l =~ /Gene:/ && l =~ /Disease:/},
|
44
|
+
:native => 1,
|
45
|
+
:extra => 3,
|
46
|
+
:header_hash => "",
|
47
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
48
|
+
:keep_empty => true do
|
49
|
+
|
50
|
+
headers ['PhGKB Gene ID', 'PhGKB Disease ID']
|
51
|
+
end
|
52
|
+
|
53
|
+
process_tsv :variants, 'variants',
|
54
|
+
:native => 1,
|
55
|
+
:extra => [3,7,8,9,10,4,5],
|
56
|
+
:header_hash => "",
|
57
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
58
|
+
:keep_empty => true do
|
59
|
+
|
60
|
+
headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Evidence']
|
61
|
+
end
|
62
|
+
|
63
|
+
file :pathways => 'source/pathways' do |t|
|
64
|
+
File.open(t.name, 'w') do |f|
|
65
|
+
f.puts "#" + ['PhGKB Pathway ID','Name','Source'] * "\t"
|
66
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
67
|
+
next unless line =~ /(PA\d+): (.*) - \((.*)\)/
|
68
|
+
f.puts [$1,$2,$3] * "\t"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
file :gene_pathway => 'source/pathways' do |t|
|
74
|
+
pathways = {}
|
75
|
+
last_pathway = nil
|
76
|
+
|
77
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
78
|
+
if line =~ /(P.*):(.*)/
|
79
|
+
last_pathway = $1
|
80
|
+
pathways[last_pathway] = {:name => $2}
|
81
|
+
else
|
82
|
+
type, code, name = line.split(/\t/)
|
83
|
+
next unless type =='Gene'
|
84
|
+
pathways[last_pathway][:genes] ||= []
|
85
|
+
pathways[last_pathway][:genes] << name
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
File.open(t.name, 'w') do |f|
|
90
|
+
f.puts "#" + ['Pathway ID', 'Name', 'Associated Gene Name'] * "\t"
|
91
|
+
pathways.each do |pathway, info|
|
92
|
+
next if info[:genes].nil?
|
93
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
file :drug_pathway => 'source/pathways' do |t|
|
99
|
+
pathways = {}
|
100
|
+
last_pathway = nil
|
101
|
+
|
102
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
103
|
+
if line =~ /(P.*):(.*)/
|
104
|
+
last_pathway = $1
|
105
|
+
pathways[last_pathway] = {:name => $2}
|
106
|
+
else
|
107
|
+
type, code, name = line.split(/\t/)
|
108
|
+
next unless type =='Drug'
|
109
|
+
pathways[last_pathway][:drugs] ||= []
|
110
|
+
pathways[last_pathway][:drugs] << name
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
File.open(t.name, 'w') do |f|
|
115
|
+
f.puts "#" + %w(ID Name Drugs) * "\t"
|
116
|
+
pathways.each do |pathway, info|
|
117
|
+
next if info[:drugs].nil?
|
118
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:drugs] * "|"}"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
file :disease_pathway => 'source/pathways' do |t|
|
125
|
+
pathways = {}
|
126
|
+
last_pathway = nil
|
127
|
+
|
128
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
129
|
+
if line =~ /(P.*):(.*)/
|
130
|
+
last_pathway = $1
|
131
|
+
pathways[last_pathway] = {:name => $2}
|
132
|
+
else
|
133
|
+
type, code, name = line.split(/\t/)
|
134
|
+
next unless type =='Disease'
|
135
|
+
pathways[last_pathway][:diseases] ||= []
|
136
|
+
pathways[last_pathway][:diseases] << name
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
File.open(t.name, 'w') do |f|
|
141
|
+
f.puts "#" + %w(ID Name Diseases) * "\t"
|
142
|
+
pathways.each do |pathway, info|
|
143
|
+
next if info[:diseases].nil?
|
144
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
|
4
|
+
"chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
|
5
|
+
|
6
|
+
process_tsv :protein_chemical, 'protein_chemicals',
|
7
|
+
:native => 1,
|
8
|
+
:grep => "9606\.",
|
9
|
+
:fix => lambda{|l| l.sub(/9606\./,'')},
|
10
|
+
:keep_empty => true do
|
11
|
+
|
12
|
+
headers ['Ensembl Protein ID', 'Chemical', 'Score']
|
13
|
+
end
|
14
|
+
|
15
|
+
$grep_re = []
|
16
|
+
process_tsv :chemicals, 'chemicals',
|
17
|
+
:grep => $grep_re,
|
18
|
+
:native => 0 do
|
19
|
+
|
20
|
+
Rake::Task['protein_chemical'].invoke
|
21
|
+
|
22
|
+
Log.debug "Getting chemicals"
|
23
|
+
chemicals = TSV.new('protein_chemical', :native => 1, :other => []).keys
|
24
|
+
Log.debug "Getting chemicals [done]"
|
25
|
+
|
26
|
+
$grep_re.replace chemicals
|
27
|
+
|
28
|
+
headers ['Ensembl Protein ID', 'Chemical', 'Score']
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_protein" => "http://string-db.org:8080/newstring_download/protein.links.v8.3.txt.gz"
|
4
|
+
|
5
|
+
process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')} do
|
6
|
+
headers ['Ensembl Protein ID', 'Ensembl Protein ID', 'Score']
|
7
|
+
end
|
8
|
+
|
@@ -0,0 +1,93 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '../../../lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
|
4
|
+
require 'rbbt/util/tsv'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'rbbt/util/log'
|
7
|
+
|
8
|
+
SOURCE_DIR = 'source'
|
9
|
+
def define_source_tasks(sources)
|
10
|
+
sources.each do |name, url|
|
11
|
+
file File.join(SOURCE_DIR, name) do |t|
|
12
|
+
FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
|
13
|
+
Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
|
14
|
+
Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
$__headers = nil
|
20
|
+
def headers(values)
|
21
|
+
$__headers = values
|
22
|
+
end
|
23
|
+
|
24
|
+
$__data = nil
|
25
|
+
def data(&block)
|
26
|
+
$__data = block
|
27
|
+
end
|
28
|
+
|
29
|
+
$__tsv_tasks = []
|
30
|
+
def tsv_tasks
|
31
|
+
$__tsv_tasks
|
32
|
+
end
|
33
|
+
|
34
|
+
$__files = []
|
35
|
+
def add_to_defaults(list)
|
36
|
+
$__files = list
|
37
|
+
end
|
38
|
+
|
39
|
+
def process_tsv(file, source, options = {}, &block)
|
40
|
+
|
41
|
+
$__tsv_tasks << file
|
42
|
+
|
43
|
+
file file => File.join(SOURCE_DIR, source) do |t|
|
44
|
+
block.call
|
45
|
+
|
46
|
+
d = TSV.new(t.prerequisites.first, options)
|
47
|
+
|
48
|
+
if d.fields != nil
|
49
|
+
data_fields = d.fields.dup.unshift d.key_field
|
50
|
+
if $__headers.nil?
|
51
|
+
$__headers = data_fields
|
52
|
+
else
|
53
|
+
$__headers = data_fields.zip($__headers).collect{|l| l.compact.last}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
if d.fields
|
58
|
+
headers = d.fields.dup.unshift d.key_field
|
59
|
+
else
|
60
|
+
headers = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
File.open(t.name.to_s, 'w') do |f|
|
64
|
+
f.puts "#" + $__headers * "\t" if $__headers != nil
|
65
|
+
d.each do |key, values|
|
66
|
+
if $__data.nil?
|
67
|
+
line = values.unshift key
|
68
|
+
else
|
69
|
+
line = $__data.call key, values
|
70
|
+
end
|
71
|
+
|
72
|
+
if Array === line
|
73
|
+
key = line.shift
|
74
|
+
fields = line.collect{|elem| Array === elem ? elem * "|" : elem }
|
75
|
+
fields.unshift key
|
76
|
+
f.puts fields * "\t"
|
77
|
+
else
|
78
|
+
f.puts line
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
task :default do |t|
|
86
|
+
($__tsv_tasks + $__files).each do |file| Rake::Task[file].invoke end
|
87
|
+
end
|
88
|
+
|
89
|
+
task :all => :default
|
90
|
+
|
91
|
+
task :clean do
|
92
|
+
($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
|
93
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/cancer'
|
5
|
+
|
6
|
+
class TestCancer < Test::Unit::TestCase
|
7
|
+
def test_anais_annotations
|
8
|
+
assert TSV.new(Cancer.anais_annotations)['ENSG00000087460']['Tumor Type'].include? 'Adrenocortical'
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/matador'
|
5
|
+
|
6
|
+
class TestMatador < Test::Unit::TestCase
|
7
|
+
def test_matador
|
8
|
+
assert_equal 'procainamide', TSV.new(Matador.protein_drug)['ENSP00000343023']['Chemical'].first
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/pharmagkb'
|
5
|
+
|
6
|
+
class TestPhGKB < Test::Unit::TestCase
|
7
|
+
def test_phgkb
|
8
|
+
assert TSV.new(PharmaGKB.variants)['rs25487']['Associated Gene Name'].include? 'XRCC1'
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/stitch'
|
5
|
+
|
6
|
+
class TestSTITCH < Test::Unit::TestCase
|
7
|
+
def test_stitch
|
8
|
+
assert TSV.new(STITCH.chemicals).keys.any?
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rbbt-phgx
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Miguel Vazquez
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-12-10 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Pharmaco-genomics related data sources
|
23
|
+
email: miguel.vazquez@fdi.ucm.es
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files:
|
29
|
+
- LICENSE
|
30
|
+
files:
|
31
|
+
- LICENSE
|
32
|
+
- lib/phgx.rb
|
33
|
+
- lib/rbbt/sources/cancer.rb
|
34
|
+
- lib/rbbt/sources/kegg.rb
|
35
|
+
- lib/rbbt/sources/matador.rb
|
36
|
+
- lib/rbbt/sources/nci.rb
|
37
|
+
- lib/rbbt/sources/pharmagkb.rb
|
38
|
+
- lib/rbbt/sources/stitch.rb
|
39
|
+
- lib/rbbt/sources/string.rb
|
40
|
+
- share/install/KEGG/Rakefile
|
41
|
+
- share/install/Matador/Rakefile
|
42
|
+
- share/install/NCI/Rakefile
|
43
|
+
- share/install/PharmaGKB/Rakefile
|
44
|
+
- share/install/STITCH/Rakefile
|
45
|
+
- share/install/STRING/Rakefile
|
46
|
+
- share/install/lib/rake_helper.rb
|
47
|
+
- test/rbbt/sources/test_cancer.rb
|
48
|
+
- test/rbbt/sources/test_matador.rb
|
49
|
+
- test/rbbt/sources/test_pharmagkb.rb
|
50
|
+
- test/rbbt/sources/test_stitch.rb
|
51
|
+
- test/test_helper.rb
|
52
|
+
has_rdoc: true
|
53
|
+
homepage: http://github.com/mikisvaz/rbbt-phgx
|
54
|
+
licenses: []
|
55
|
+
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
requirements: []
|
80
|
+
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.3.7
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
86
|
+
test_files:
|
87
|
+
- test/rbbt/sources/test_cancer.rb
|
88
|
+
- test/rbbt/sources/test_matador.rb
|
89
|
+
- test/rbbt/sources/test_pharmagkb.rb
|
90
|
+
- test/rbbt/sources/test_stitch.rb
|
91
|
+
- test/test_helper.rb
|