rbbt-phgx 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/lib/phgx.rb +16 -0
- data/lib/rbbt/sources/cancer.rb +12 -0
- data/lib/rbbt/sources/kegg.rb +9 -0
- data/lib/rbbt/sources/matador.rb +9 -0
- data/lib/rbbt/sources/nci.rb +9 -0
- data/lib/rbbt/sources/pharmagkb.rb +9 -0
- data/lib/rbbt/sources/stitch.rb +9 -0
- data/lib/rbbt/sources/string.rb +9 -0
- data/share/install/KEGG/Rakefile +109 -0
- data/share/install/Matador/Rakefile +15 -0
- data/share/install/NCI/Rakefile +137 -0
- data/share/install/PharmaGKB/Rakefile +149 -0
- data/share/install/STITCH/Rakefile +30 -0
- data/share/install/STRING/Rakefile +8 -0
- data/share/install/lib/rake_helper.rb +93 -0
- data/test/rbbt/sources/test_cancer.rb +11 -0
- data/test/rbbt/sources/test_matador.rb +11 -0
- data/test/rbbt/sources/test_pharmagkb.rb +11 -0
- data/test/rbbt/sources/test_stitch.rb +11 -0
- data/test/test_helper.rb +9 -0
- metadata +91 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010-2011 Miguel Vázquez García
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/lib/phgx.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/util/pkg_config'
|
3
|
+
require 'rbbt/util/pkg_data'
|
4
|
+
require 'rbbt/util/pkg_software'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'rbbt/util/tmpfile'
|
7
|
+
require 'rbbt/util/filecache'
|
8
|
+
|
9
|
+
module PhGx
|
10
|
+
extend PKGConfig
|
11
|
+
extend PKGData
|
12
|
+
extend PKGSoftware
|
13
|
+
|
14
|
+
self.load_cfg(%w(datadir), "datadir: #{File.join(ENV['HOME'], 'phgx', 'data')}\n")
|
15
|
+
end
|
16
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'phgx'
|
2
|
+
require 'rbbt/util/data_module'
|
3
|
+
|
4
|
+
module Cancer
|
5
|
+
PhGx.add_datafiles :anais_annotations => ['Cancer', 'Cancer/anais-annotations.txt'],
|
6
|
+
:anais_interactions => ['Cancer', 'Cancer/anais-interactions.txt']
|
7
|
+
|
8
|
+
PKG = PhGx
|
9
|
+
extend DataModule
|
10
|
+
end
|
11
|
+
|
12
|
+
if __FILE__ == $0 then NCI.all end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
|
4
|
+
"hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
|
5
|
+
"drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
|
6
|
+
"pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
|
7
|
+
|
8
|
+
|
9
|
+
file :genes => 'source/h.sapiens' do |t|
|
10
|
+
pairs = {}
|
11
|
+
entry = nil
|
12
|
+
Open.read(t.prerequisites.first).each do |line|
|
13
|
+
if line =~ /^ENTRY\s+(\d+)/
|
14
|
+
entry = $1
|
15
|
+
next
|
16
|
+
end
|
17
|
+
|
18
|
+
if line =~ /Ensembl: (ENSG\d+)/
|
19
|
+
pairs[entry] = $1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
|
24
|
+
end
|
25
|
+
|
26
|
+
file :gene_drug => 'source/drugs' do |t|
|
27
|
+
pairs = {}
|
28
|
+
drug = nil
|
29
|
+
Open.read(t.prerequisites.first).
|
30
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
31
|
+
if line =~ /^ENTRY\s+(\w+)/
|
32
|
+
drug = $1
|
33
|
+
next
|
34
|
+
end
|
35
|
+
|
36
|
+
if line =~ /TARGET.*?\[HSA:(.*?)\]/
|
37
|
+
genes = $1.split(/\s/)
|
38
|
+
genes.each do |gene|
|
39
|
+
pairs[gene] ||= []
|
40
|
+
pairs[gene] << drug
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
file :drugs => 'source/drugs' do |t|
|
49
|
+
info = {}
|
50
|
+
drug = nil
|
51
|
+
Open.read(t.prerequisites.first).
|
52
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
53
|
+
if line =~ /^ENTRY\s+(\w+)/
|
54
|
+
drug = $1
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
if line =~ /^NAME(.*)/
|
59
|
+
names = $1.split(/;/)
|
60
|
+
names.each do |name|
|
61
|
+
info[drug] ||= [[],[]]
|
62
|
+
info[drug][0] << name.chomp.strip
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if line =~ /^DBLINKS(.*)/
|
67
|
+
$1.match(/PubChem: (\d*)/)
|
68
|
+
pubchem = $1
|
69
|
+
next unless pubchem
|
70
|
+
info[drug] ||= [[],[]]
|
71
|
+
info[drug][1] << pubchem.chomp.strip
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
|
76
|
+
end
|
77
|
+
|
78
|
+
file :pathways => 'source/pathways' do |t|
|
79
|
+
descs = {}
|
80
|
+
names = {}
|
81
|
+
pathway = nil
|
82
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
83
|
+
if line =~ /ENTRY\s+(\w+)/
|
84
|
+
pathway = $1.strip
|
85
|
+
end
|
86
|
+
|
87
|
+
if line =~ /NAME (.*)/
|
88
|
+
names[pathway] = $1.strip
|
89
|
+
end
|
90
|
+
|
91
|
+
if line =~ /DESCRIPTION (.*)/
|
92
|
+
descs[pathway] = $1.strip
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
Open.write(t.name, ['#KEGG Pathway ID', 'Name', 'Description'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway]] * "\t"} * "\n")
|
97
|
+
end
|
98
|
+
|
99
|
+
process_tsv :gene_pathway, 'hsa_gene_map.tab',
|
100
|
+
:sep2 => ' ' do
|
101
|
+
headers ['KEGG Gene ID', 'KEGG Pathways']
|
102
|
+
data do |gene, pathway|
|
103
|
+
"hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
|
108
|
+
|
109
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "matador" => "http://matador.embl.de/media/download/matador.tsv.gz"
|
4
|
+
|
5
|
+
|
6
|
+
process_tsv :protein_drug, 'matador',
|
7
|
+
:native => 3,
|
8
|
+
:fix => lambda{|l| l.sub(/9606./,'')},
|
9
|
+
:extra => [1,0,7,8,9,10,11,12],
|
10
|
+
:header_hash => "",
|
11
|
+
:keep_empty => true do
|
12
|
+
|
13
|
+
headers ['Ensembl Protein ID', 'Chemical', 'Matador ID', 'Score', 'Annotation', 'Mesh_Score', 'Mesh_Annotation', 'Matador_Score', 'Matador_Annotation']
|
14
|
+
end
|
15
|
+
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "cancer_gene" => "https://gforge.nci.nih.gov/frs/download.php/6819/NCI_CancerIndex_allphases_disease.zip",
|
4
|
+
"gene_drug" => "https://gforge.nci.nih.gov/frs/download.php/6821/NCI_CancerIndex_allphases_compound.zip"
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
file :gene_drug => 'source/gene_drug' do |t|
|
9
|
+
entry = nil
|
10
|
+
uniprot = nil
|
11
|
+
drugs = []
|
12
|
+
concepts = []
|
13
|
+
statements = []
|
14
|
+
pmids = []
|
15
|
+
|
16
|
+
info = {}
|
17
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
18
|
+
if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
|
19
|
+
if entry != nil
|
20
|
+
drug_info = {}
|
21
|
+
TSV.zip_fields([drugs, concepts, statements, pmids]).each do |drug_fields|
|
22
|
+
drug, concept, statement, pmid = drug_fields
|
23
|
+
drug_info[drug] ||= {:statements => [], :pmids => []}
|
24
|
+
drug_info[drug][:concept] = concept
|
25
|
+
drug_info[drug][:statements] << statement
|
26
|
+
drug_info[drug][:pmids] << pmid
|
27
|
+
end
|
28
|
+
drugs = drug_info.keys
|
29
|
+
concepts = drug_info.values_at(*drugs).collect{|v| v[:concept]}
|
30
|
+
statements = drug_info.values_at(*drugs).collect{|v| v[:statements] * ";;"}
|
31
|
+
pmids = drug_info.values_at(*drugs).collect{|v| v[:pmids] * ";;"}
|
32
|
+
info[entry] = [entry, drugs * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
|
33
|
+
end
|
34
|
+
entry = $1
|
35
|
+
aliases = []
|
36
|
+
uniprot = nil
|
37
|
+
drugs = []
|
38
|
+
concepts = []
|
39
|
+
statements = []
|
40
|
+
pmids = []
|
41
|
+
end
|
42
|
+
|
43
|
+
if line =~ /<UniProtID>(.*)<\/UniProtID>/
|
44
|
+
uniprot = $1
|
45
|
+
end
|
46
|
+
|
47
|
+
if line =~ /<DrugTerm>(.*)<\/DrugTerm>/
|
48
|
+
drugs << $1 || ""
|
49
|
+
end
|
50
|
+
|
51
|
+
if line =~ /<NCIDrugConceptCode>(.*)<\/NCIDrugConceptCode>/
|
52
|
+
concepts << $1 || ""
|
53
|
+
end
|
54
|
+
|
55
|
+
if line =~ /<Statement>(.*)<\/Statement>/
|
56
|
+
statements << $1 || ""
|
57
|
+
end
|
58
|
+
|
59
|
+
if line =~ /<PubMedID>(.*)<\/PubMedID>/
|
60
|
+
pmids << $1 || ""
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
File.open(t.name, 'w') do |f|
|
65
|
+
f.puts "#" + ['Associated Gene Name', 'Drugs', 'Drug Concepts', 'Statements', 'PMIDS'] * "\t"
|
66
|
+
info.each do |entry, line|
|
67
|
+
f.puts line
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
file :gene_cancer => 'source/cancer_gene' do |t|
|
73
|
+
entry = nil
|
74
|
+
uniprot = nil
|
75
|
+
diseases = []
|
76
|
+
concepts = []
|
77
|
+
statements = []
|
78
|
+
pmids = []
|
79
|
+
|
80
|
+
info = {}
|
81
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
82
|
+
if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
|
83
|
+
if entry != nil
|
84
|
+
disease_info = {}
|
85
|
+
TSV.zip_fields([diseases, concepts, statements, pmids]).each do |disease_fields|
|
86
|
+
disease, concept, statement, pmid = disease_fields
|
87
|
+
disease_info[disease] ||= {:statements => [], :pmids => []}
|
88
|
+
disease_info[disease][:concept] = concept
|
89
|
+
disease_info[disease][:statements] << statement
|
90
|
+
disease_info[disease][:pmids] << pmid
|
91
|
+
end
|
92
|
+
diseases = disease_info.keys
|
93
|
+
concepts = disease_info.values_at(*diseases).collect{|v| v[:concept]}
|
94
|
+
statements = disease_info.values_at(*diseases).collect{|v| v[:statements] * ";;"}
|
95
|
+
pmids = disease_info.values_at(*diseases).collect{|v| v[:pmids] * ";;"}
|
96
|
+
info[entry] = [entry, diseases * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
|
97
|
+
end
|
98
|
+
entry = $1
|
99
|
+
aliases = []
|
100
|
+
uniprot = nil
|
101
|
+
diseases = []
|
102
|
+
concepts = []
|
103
|
+
statements = []
|
104
|
+
pmids = []
|
105
|
+
end
|
106
|
+
|
107
|
+
if line =~ /<UniProtID>(.*)<\/UniProtID>/
|
108
|
+
uniprot = $1
|
109
|
+
end
|
110
|
+
|
111
|
+
if line =~ /<MatchedDiseaseTerm>(.*)<\/MatchedDiseaseTerm>/
|
112
|
+
diseases << $1 || ""
|
113
|
+
end
|
114
|
+
|
115
|
+
if line =~ /<NCIDiseaseConceptCode>(.*)<\/NCIDiseaseConceptCode>/
|
116
|
+
concepts << $1 || ""
|
117
|
+
end
|
118
|
+
|
119
|
+
if line =~ /<Statement>(.*)<\/Statement>/
|
120
|
+
statements << $1 || ""
|
121
|
+
end
|
122
|
+
|
123
|
+
if line =~ /<PubMedID>(.*)<\/PubMedID>/
|
124
|
+
pmids << $1 || ""
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
File.open(t.name, 'w') do |f|
|
129
|
+
f.puts "#" + ['Associated Gene Name', 'Diseases', 'Disease Concepts', 'Statements', 'PMIDS'] * "\t"
|
130
|
+
info.each do |entry, line|
|
131
|
+
f.puts line
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
add_to_defaults [:gene_cancer, :gene_drug]
|
137
|
+
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
|
4
|
+
"drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
|
5
|
+
"diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
|
6
|
+
"relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
|
7
|
+
"variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
|
8
|
+
"pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
|
9
|
+
|
10
|
+
|
11
|
+
process_tsv :diseases, 'diseases',
|
12
|
+
:header_hash => "",
|
13
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
14
|
+
headers ['PhGKB Disease ID']
|
15
|
+
end
|
16
|
+
|
17
|
+
process_tsv :genes, 'genes',
|
18
|
+
:header_hash => "",
|
19
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
20
|
+
headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
|
21
|
+
end
|
22
|
+
|
23
|
+
process_tsv :drugs, 'drugs',
|
24
|
+
:header_hash => "",
|
25
|
+
:extra => ['Name', 'DrugBank Id'],
|
26
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
27
|
+
headers ['PhGKB Drug ID', 'Name', 'DrugBank ID', ]
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
process_tsv :gene_drug, 'relationships',
|
32
|
+
:select => proc{|l| l =~ /Gene:/ && l =~ /Drug:/},
|
33
|
+
:native => 'Entity1_id',
|
34
|
+
:extra => ['Entity2_id','Relationship'],
|
35
|
+
:header_hash => "",
|
36
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
37
|
+
:keep_empty => true do
|
38
|
+
|
39
|
+
headers ['PhGKB Gene ID', 'Drug']
|
40
|
+
end
|
41
|
+
|
42
|
+
process_tsv :gene_disease, 'relationships',
|
43
|
+
:select => proc{|l| l =~ /Gene:/ && l =~ /Disease:/},
|
44
|
+
:native => 1,
|
45
|
+
:extra => 3,
|
46
|
+
:header_hash => "",
|
47
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
48
|
+
:keep_empty => true do
|
49
|
+
|
50
|
+
headers ['PhGKB Gene ID', 'PhGKB Disease ID']
|
51
|
+
end
|
52
|
+
|
53
|
+
process_tsv :variants, 'variants',
|
54
|
+
:native => 1,
|
55
|
+
:extra => [3,7,8,9,10,4,5],
|
56
|
+
:header_hash => "",
|
57
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
58
|
+
:keep_empty => true do
|
59
|
+
|
60
|
+
headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Evidence']
|
61
|
+
end
|
62
|
+
|
63
|
+
file :pathways => 'source/pathways' do |t|
|
64
|
+
File.open(t.name, 'w') do |f|
|
65
|
+
f.puts "#" + ['PhGKB Pathway ID','Name','Source'] * "\t"
|
66
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
67
|
+
next unless line =~ /(PA\d+): (.*) - \((.*)\)/
|
68
|
+
f.puts [$1,$2,$3] * "\t"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
file :gene_pathway => 'source/pathways' do |t|
|
74
|
+
pathways = {}
|
75
|
+
last_pathway = nil
|
76
|
+
|
77
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
78
|
+
if line =~ /(P.*):(.*)/
|
79
|
+
last_pathway = $1
|
80
|
+
pathways[last_pathway] = {:name => $2}
|
81
|
+
else
|
82
|
+
type, code, name = line.split(/\t/)
|
83
|
+
next unless type =='Gene'
|
84
|
+
pathways[last_pathway][:genes] ||= []
|
85
|
+
pathways[last_pathway][:genes] << name
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
File.open(t.name, 'w') do |f|
|
90
|
+
f.puts "#" + ['Pathway ID', 'Name', 'Associated Gene Name'] * "\t"
|
91
|
+
pathways.each do |pathway, info|
|
92
|
+
next if info[:genes].nil?
|
93
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
file :drug_pathway => 'source/pathways' do |t|
|
99
|
+
pathways = {}
|
100
|
+
last_pathway = nil
|
101
|
+
|
102
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
103
|
+
if line =~ /(P.*):(.*)/
|
104
|
+
last_pathway = $1
|
105
|
+
pathways[last_pathway] = {:name => $2}
|
106
|
+
else
|
107
|
+
type, code, name = line.split(/\t/)
|
108
|
+
next unless type =='Drug'
|
109
|
+
pathways[last_pathway][:drugs] ||= []
|
110
|
+
pathways[last_pathway][:drugs] << name
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
File.open(t.name, 'w') do |f|
|
115
|
+
f.puts "#" + %w(ID Name Drugs) * "\t"
|
116
|
+
pathways.each do |pathway, info|
|
117
|
+
next if info[:drugs].nil?
|
118
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:drugs] * "|"}"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
file :disease_pathway => 'source/pathways' do |t|
|
125
|
+
pathways = {}
|
126
|
+
last_pathway = nil
|
127
|
+
|
128
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
129
|
+
if line =~ /(P.*):(.*)/
|
130
|
+
last_pathway = $1
|
131
|
+
pathways[last_pathway] = {:name => $2}
|
132
|
+
else
|
133
|
+
type, code, name = line.split(/\t/)
|
134
|
+
next unless type =='Disease'
|
135
|
+
pathways[last_pathway][:diseases] ||= []
|
136
|
+
pathways[last_pathway][:diseases] << name
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
File.open(t.name, 'w') do |f|
|
141
|
+
f.puts "#" + %w(ID Name Diseases) * "\t"
|
142
|
+
pathways.each do |pathway, info|
|
143
|
+
next if info[:diseases].nil?
|
144
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
|
4
|
+
"chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
|
5
|
+
|
6
|
+
process_tsv :protein_chemical, 'protein_chemicals',
|
7
|
+
:native => 1,
|
8
|
+
:grep => "9606\.",
|
9
|
+
:fix => lambda{|l| l.sub(/9606\./,'')},
|
10
|
+
:keep_empty => true do
|
11
|
+
|
12
|
+
headers ['Ensembl Protein ID', 'Chemical', 'Score']
|
13
|
+
end
|
14
|
+
|
15
|
+
$grep_re = []
|
16
|
+
process_tsv :chemicals, 'chemicals',
|
17
|
+
:grep => $grep_re,
|
18
|
+
:native => 0 do
|
19
|
+
|
20
|
+
Rake::Task['protein_chemical'].invoke
|
21
|
+
|
22
|
+
Log.debug "Getting chemicals"
|
23
|
+
chemicals = TSV.new('protein_chemical', :native => 1, :other => []).keys
|
24
|
+
Log.debug "Getting chemicals [done]"
|
25
|
+
|
26
|
+
$grep_re.replace chemicals
|
27
|
+
|
28
|
+
headers ['Ensembl Protein ID', 'Chemical', 'Score']
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_protein" => "http://string-db.org:8080/newstring_download/protein.links.v8.3.txt.gz"
|
4
|
+
|
5
|
+
process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')} do
|
6
|
+
headers ['Ensembl Protein ID', 'Ensembl Protein ID', 'Score']
|
7
|
+
end
|
8
|
+
|
@@ -0,0 +1,93 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '../../../lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
|
4
|
+
require 'rbbt/util/tsv'
|
5
|
+
require 'rbbt/util/open'
|
6
|
+
require 'rbbt/util/log'
|
7
|
+
|
8
|
+
SOURCE_DIR = 'source'
|
9
|
+
def define_source_tasks(sources)
|
10
|
+
sources.each do |name, url|
|
11
|
+
file File.join(SOURCE_DIR, name) do |t|
|
12
|
+
FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
|
13
|
+
Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
|
14
|
+
Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
$__headers = nil
|
20
|
+
def headers(values)
|
21
|
+
$__headers = values
|
22
|
+
end
|
23
|
+
|
24
|
+
$__data = nil
|
25
|
+
def data(&block)
|
26
|
+
$__data = block
|
27
|
+
end
|
28
|
+
|
29
|
+
$__tsv_tasks = []
|
30
|
+
def tsv_tasks
|
31
|
+
$__tsv_tasks
|
32
|
+
end
|
33
|
+
|
34
|
+
$__files = []
|
35
|
+
def add_to_defaults(list)
|
36
|
+
$__files = list
|
37
|
+
end
|
38
|
+
|
39
|
+
def process_tsv(file, source, options = {}, &block)
|
40
|
+
|
41
|
+
$__tsv_tasks << file
|
42
|
+
|
43
|
+
file file => File.join(SOURCE_DIR, source) do |t|
|
44
|
+
block.call
|
45
|
+
|
46
|
+
d = TSV.new(t.prerequisites.first, options)
|
47
|
+
|
48
|
+
if d.fields != nil
|
49
|
+
data_fields = d.fields.dup.unshift d.key_field
|
50
|
+
if $__headers.nil?
|
51
|
+
$__headers = data_fields
|
52
|
+
else
|
53
|
+
$__headers = data_fields.zip($__headers).collect{|l| l.compact.last}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
if d.fields
|
58
|
+
headers = d.fields.dup.unshift d.key_field
|
59
|
+
else
|
60
|
+
headers = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
File.open(t.name.to_s, 'w') do |f|
|
64
|
+
f.puts "#" + $__headers * "\t" if $__headers != nil
|
65
|
+
d.each do |key, values|
|
66
|
+
if $__data.nil?
|
67
|
+
line = values.unshift key
|
68
|
+
else
|
69
|
+
line = $__data.call key, values
|
70
|
+
end
|
71
|
+
|
72
|
+
if Array === line
|
73
|
+
key = line.shift
|
74
|
+
fields = line.collect{|elem| Array === elem ? elem * "|" : elem }
|
75
|
+
fields.unshift key
|
76
|
+
f.puts fields * "\t"
|
77
|
+
else
|
78
|
+
f.puts line
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
task :default do |t|
|
86
|
+
($__tsv_tasks + $__files).each do |file| Rake::Task[file].invoke end
|
87
|
+
end
|
88
|
+
|
89
|
+
task :all => :default
|
90
|
+
|
91
|
+
task :clean do
|
92
|
+
($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
|
93
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/cancer'
|
5
|
+
|
6
|
+
class TestCancer < Test::Unit::TestCase
|
7
|
+
def test_anais_annotations
|
8
|
+
assert TSV.new(Cancer.anais_annotations)['ENSG00000087460']['Tumor Type'].include? 'Adrenocortical'
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/matador'
|
5
|
+
|
6
|
+
class TestMatador < Test::Unit::TestCase
|
7
|
+
def test_matador
|
8
|
+
assert_equal 'procainamide', TSV.new(Matador.protein_drug)['ENSP00000343023']['Chemical'].first
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/pharmagkb'
|
5
|
+
|
6
|
+
class TestPhGKB < Test::Unit::TestCase
|
7
|
+
def test_phgkb
|
8
|
+
assert TSV.new(PharmaGKB.variants)['rs25487']['Associated Gene Name'].include? 'XRCC1'
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'test/unit'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'rbbt/sources/stitch'
|
5
|
+
|
6
|
+
class TestSTITCH < Test::Unit::TestCase
|
7
|
+
def test_stitch
|
8
|
+
assert TSV.new(STITCH.chemicals).keys.any?
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rbbt-phgx
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Miguel Vazquez
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-12-10 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Pharmaco-genomics related data sources
|
23
|
+
email: miguel.vazquez@fdi.ucm.es
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files:
|
29
|
+
- LICENSE
|
30
|
+
files:
|
31
|
+
- LICENSE
|
32
|
+
- lib/phgx.rb
|
33
|
+
- lib/rbbt/sources/cancer.rb
|
34
|
+
- lib/rbbt/sources/kegg.rb
|
35
|
+
- lib/rbbt/sources/matador.rb
|
36
|
+
- lib/rbbt/sources/nci.rb
|
37
|
+
- lib/rbbt/sources/pharmagkb.rb
|
38
|
+
- lib/rbbt/sources/stitch.rb
|
39
|
+
- lib/rbbt/sources/string.rb
|
40
|
+
- share/install/KEGG/Rakefile
|
41
|
+
- share/install/Matador/Rakefile
|
42
|
+
- share/install/NCI/Rakefile
|
43
|
+
- share/install/PharmaGKB/Rakefile
|
44
|
+
- share/install/STITCH/Rakefile
|
45
|
+
- share/install/STRING/Rakefile
|
46
|
+
- share/install/lib/rake_helper.rb
|
47
|
+
- test/rbbt/sources/test_cancer.rb
|
48
|
+
- test/rbbt/sources/test_matador.rb
|
49
|
+
- test/rbbt/sources/test_pharmagkb.rb
|
50
|
+
- test/rbbt/sources/test_stitch.rb
|
51
|
+
- test/test_helper.rb
|
52
|
+
has_rdoc: true
|
53
|
+
homepage: http://github.com/mikisvaz/rbbt-phgx
|
54
|
+
licenses: []
|
55
|
+
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
requirements: []
|
80
|
+
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 1.3.7
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
86
|
+
test_files:
|
87
|
+
- test/rbbt/sources/test_cancer.rb
|
88
|
+
- test/rbbt/sources/test_matador.rb
|
89
|
+
- test/rbbt/sources/test_pharmagkb.rb
|
90
|
+
- test/rbbt/sources/test_stitch.rb
|
91
|
+
- test/test_helper.rb
|