rbbt-phgx 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2011 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/lib/phgx.rb ADDED
@@ -0,0 +1,16 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/util/pkg_config'
3
+ require 'rbbt/util/pkg_data'
4
+ require 'rbbt/util/pkg_software'
5
+ require 'rbbt/util/open'
6
+ require 'rbbt/util/tmpfile'
7
+ require 'rbbt/util/filecache'
8
+
9
+ module PhGx
10
+ extend PKGConfig
11
+ extend PKGData
12
+ extend PKGSoftware
13
+
14
+ self.load_cfg(%w(datadir), "datadir: #{File.join(ENV['HOME'], 'phgx', 'data')}\n")
15
+ end
16
+
@@ -0,0 +1,12 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module Cancer
5
+ PhGx.add_datafiles :anais_annotations => ['Cancer', 'Cancer/anais-annotations.txt'],
6
+ :anais_interactions => ['Cancer', 'Cancer/anais-interactions.txt']
7
+
8
+ PKG = PhGx
9
+ extend DataModule
10
+ end
11
+
12
+ if __FILE__ == $0 then NCI.all end
@@ -0,0 +1,9 @@
1
+ require 'rbbt/util/data_module'
2
+ require 'phgx'
3
+
4
+ module KEGG
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then KEGG.all end
@@ -0,0 +1,9 @@
1
+ require 'rbbt/util/data_module'
2
+ require 'phgx'
3
+
4
+ module Matador
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then Matador.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module NCI
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then NCI.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module PharmaGKB
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then PharmaGKB.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module STITCH
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then STITCH.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module STRING
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then STRING.all end
@@ -0,0 +1,109 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
4
+ "hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
5
+ "drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
6
+ "pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
7
+
8
+
9
+ file :genes => 'source/h.sapiens' do |t|
10
+ pairs = {}
11
+ entry = nil
12
+ Open.read(t.prerequisites.first).each do |line|
13
+ if line =~ /^ENTRY\s+(\d+)/
14
+ entry = $1
15
+ next
16
+ end
17
+
18
+ if line =~ /Ensembl: (ENSG\d+)/
19
+ pairs[entry] = $1
20
+ end
21
+ end
22
+
23
+ Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
24
+ end
25
+
26
+ file :gene_drug => 'source/drugs' do |t|
27
+ pairs = {}
28
+ drug = nil
29
+ Open.read(t.prerequisites.first).
30
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
31
+ if line =~ /^ENTRY\s+(\w+)/
32
+ drug = $1
33
+ next
34
+ end
35
+
36
+ if line =~ /TARGET.*?\[HSA:(.*?)\]/
37
+ genes = $1.split(/\s/)
38
+ genes.each do |gene|
39
+ pairs[gene] ||= []
40
+ pairs[gene] << drug
41
+ end
42
+ end
43
+ end
44
+
45
+ Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
46
+ end
47
+
48
+ file :drugs => 'source/drugs' do |t|
49
+ info = {}
50
+ drug = nil
51
+ Open.read(t.prerequisites.first).
52
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
53
+ if line =~ /^ENTRY\s+(\w+)/
54
+ drug = $1
55
+ next
56
+ end
57
+
58
+ if line =~ /^NAME(.*)/
59
+ names = $1.split(/;/)
60
+ names.each do |name|
61
+ info[drug] ||= [[],[]]
62
+ info[drug][0] << name.chomp.strip
63
+ end
64
+ end
65
+
66
+ if line =~ /^DBLINKS(.*)/
67
+ $1.match(/PubChem: (\d*)/)
68
+ pubchem = $1
69
+ next unless pubchem
70
+ info[drug] ||= [[],[]]
71
+ info[drug][1] << pubchem.chomp.strip
72
+ end
73
+ end
74
+
75
+ Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
76
+ end
77
+
78
+ file :pathways => 'source/pathways' do |t|
79
+ descs = {}
80
+ names = {}
81
+ pathway = nil
82
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
83
+ if line =~ /ENTRY\s+(\w+)/
84
+ pathway = $1.strip
85
+ end
86
+
87
+ if line =~ /NAME (.*)/
88
+ names[pathway] = $1.strip
89
+ end
90
+
91
+ if line =~ /DESCRIPTION (.*)/
92
+ descs[pathway] = $1.strip
93
+ end
94
+ end
95
+
96
+ Open.write(t.name, ['#KEGG Pathway ID', 'Name', 'Description'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway]] * "\t"} * "\n")
97
+ end
98
+
99
+ process_tsv :gene_pathway, 'hsa_gene_map.tab',
100
+ :sep2 => ' ' do
101
+ headers ['KEGG Gene ID', 'KEGG Pathways']
102
+ data do |gene, pathway|
103
+ "hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
104
+ end
105
+ end
106
+
107
+ add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
108
+
109
+
@@ -0,0 +1,15 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "matador" => "http://matador.embl.de/media/download/matador.tsv.gz"
4
+
5
+
6
+ process_tsv :protein_drug, 'matador',
7
+ :native => 3,
8
+ :fix => lambda{|l| l.sub(/9606./,'')},
9
+ :extra => [1,0,7,8,9,10,11,12],
10
+ :header_hash => "",
11
+ :keep_empty => true do
12
+
13
+ headers ['Ensembl Protein ID', 'Chemical', 'Matador ID', 'Score', 'Annotation', 'Mesh_Score', 'Mesh_Annotation', 'Matador_Score', 'Matador_Annotation']
14
+ end
15
+
@@ -0,0 +1,137 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "cancer_gene" => "https://gforge.nci.nih.gov/frs/download.php/6819/NCI_CancerIndex_allphases_disease.zip",
4
+ "gene_drug" => "https://gforge.nci.nih.gov/frs/download.php/6821/NCI_CancerIndex_allphases_compound.zip"
5
+
6
+
7
+
8
+ file :gene_drug => 'source/gene_drug' do |t|
9
+ entry = nil
10
+ uniprot = nil
11
+ drugs = []
12
+ concepts = []
13
+ statements = []
14
+ pmids = []
15
+
16
+ info = {}
17
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
18
+ if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
19
+ if entry != nil
20
+ drug_info = {}
21
+ TSV.zip_fields([drugs, concepts, statements, pmids]).each do |drug_fields|
22
+ drug, concept, statement, pmid = drug_fields
23
+ drug_info[drug] ||= {:statements => [], :pmids => []}
24
+ drug_info[drug][:concept] = concept
25
+ drug_info[drug][:statements] << statement
26
+ drug_info[drug][:pmids] << pmid
27
+ end
28
+ drugs = drug_info.keys
29
+ concepts = drug_info.values_at(*drugs).collect{|v| v[:concept]}
30
+ statements = drug_info.values_at(*drugs).collect{|v| v[:statements] * ";;"}
31
+ pmids = drug_info.values_at(*drugs).collect{|v| v[:pmids] * ";;"}
32
+ info[entry] = [entry, drugs * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
33
+ end
34
+ entry = $1
35
+ aliases = []
36
+ uniprot = nil
37
+ drugs = []
38
+ concepts = []
39
+ statements = []
40
+ pmids = []
41
+ end
42
+
43
+ if line =~ /<UniProtID>(.*)<\/UniProtID>/
44
+ uniprot = $1
45
+ end
46
+
47
+ if line =~ /<DrugTerm>(.*)<\/DrugTerm>/
48
+ drugs << $1 || ""
49
+ end
50
+
51
+ if line =~ /<NCIDrugConceptCode>(.*)<\/NCIDrugConceptCode>/
52
+ concepts << $1 || ""
53
+ end
54
+
55
+ if line =~ /<Statement>(.*)<\/Statement>/
56
+ statements << $1 || ""
57
+ end
58
+
59
+ if line =~ /<PubMedID>(.*)<\/PubMedID>/
60
+ pmids << $1 || ""
61
+ end
62
+ end
63
+
64
+ File.open(t.name, 'w') do |f|
65
+ f.puts "#" + ['Associated Gene Name', 'Drugs', 'Drug Concepts', 'Statements', 'PMIDS'] * "\t"
66
+ info.each do |entry, line|
67
+ f.puts line
68
+ end
69
+ end
70
+ end
71
+
72
+ file :gene_cancer => 'source/cancer_gene' do |t|
73
+ entry = nil
74
+ uniprot = nil
75
+ diseases = []
76
+ concepts = []
77
+ statements = []
78
+ pmids = []
79
+
80
+ info = {}
81
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
82
+ if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
83
+ if entry != nil
84
+ disease_info = {}
85
+ TSV.zip_fields([diseases, concepts, statements, pmids]).each do |disease_fields|
86
+ disease, concept, statement, pmid = disease_fields
87
+ disease_info[disease] ||= {:statements => [], :pmids => []}
88
+ disease_info[disease][:concept] = concept
89
+ disease_info[disease][:statements] << statement
90
+ disease_info[disease][:pmids] << pmid
91
+ end
92
+ diseases = disease_info.keys
93
+ concepts = disease_info.values_at(*diseases).collect{|v| v[:concept]}
94
+ statements = disease_info.values_at(*diseases).collect{|v| v[:statements] * ";;"}
95
+ pmids = disease_info.values_at(*diseases).collect{|v| v[:pmids] * ";;"}
96
+ info[entry] = [entry, diseases * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
97
+ end
98
+ entry = $1
99
+ aliases = []
100
+ uniprot = nil
101
+ diseases = []
102
+ concepts = []
103
+ statements = []
104
+ pmids = []
105
+ end
106
+
107
+ if line =~ /<UniProtID>(.*)<\/UniProtID>/
108
+ uniprot = $1
109
+ end
110
+
111
+ if line =~ /<MatchedDiseaseTerm>(.*)<\/MatchedDiseaseTerm>/
112
+ diseases << $1 || ""
113
+ end
114
+
115
+ if line =~ /<NCIDiseaseConceptCode>(.*)<\/NCIDiseaseConceptCode>/
116
+ concepts << $1 || ""
117
+ end
118
+
119
+ if line =~ /<Statement>(.*)<\/Statement>/
120
+ statements << $1 || ""
121
+ end
122
+
123
+ if line =~ /<PubMedID>(.*)<\/PubMedID>/
124
+ pmids << $1 || ""
125
+ end
126
+ end
127
+
128
+ File.open(t.name, 'w') do |f|
129
+ f.puts "#" + ['Associated Gene Name', 'Diseases', 'Disease Concepts', 'Statements', 'PMIDS'] * "\t"
130
+ info.each do |entry, line|
131
+ f.puts line
132
+ end
133
+ end
134
+ end
135
+
136
+ add_to_defaults [:gene_cancer, :gene_drug]
137
+
@@ -0,0 +1,149 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
4
+ "drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
5
+ "diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
6
+ "relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
7
+ "variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
8
+ "pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
9
+
10
+
11
+ process_tsv :diseases, 'diseases',
12
+ :header_hash => "",
13
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
14
+ headers ['PhGKB Disease ID']
15
+ end
16
+
17
+ process_tsv :genes, 'genes',
18
+ :header_hash => "",
19
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
20
+ headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
21
+ end
22
+
23
+ process_tsv :drugs, 'drugs',
24
+ :header_hash => "",
25
+ :extra => ['Name', 'DrugBank Id'],
26
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
27
+ headers ['PhGKB Drug ID', 'Name', 'DrugBank ID', ]
28
+ end
29
+
30
+
31
+ process_tsv :gene_drug, 'relationships',
32
+ :select => proc{|l| l =~ /Gene:/ && l =~ /Drug:/},
33
+ :native => 'Entity1_id',
34
+ :extra => ['Entity2_id','Relationship'],
35
+ :header_hash => "",
36
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
37
+ :keep_empty => true do
38
+
39
+ headers ['PhGKB Gene ID', 'Drug']
40
+ end
41
+
42
+ process_tsv :gene_disease, 'relationships',
43
+ :select => proc{|l| l =~ /Gene:/ && l =~ /Disease:/},
44
+ :native => 1,
45
+ :extra => 3,
46
+ :header_hash => "",
47
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
48
+ :keep_empty => true do
49
+
50
+ headers ['PhGKB Gene ID', 'PhGKB Disease ID']
51
+ end
52
+
53
+ process_tsv :variants, 'variants',
54
+ :native => 1,
55
+ :extra => [3,7,8,9,10,4,5],
56
+ :header_hash => "",
57
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
58
+ :keep_empty => true do
59
+
60
+ headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Evidence']
61
+ end
62
+
63
+ file :pathways => 'source/pathways' do |t|
64
+ File.open(t.name, 'w') do |f|
65
+ f.puts "#" + ['PhGKB Pathway ID','Name','Source'] * "\t"
66
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
67
+ next unless line =~ /(PA\d+): (.*) - \((.*)\)/
68
+ f.puts [$1,$2,$3] * "\t"
69
+ end
70
+ end
71
+ end
72
+
73
+ file :gene_pathway => 'source/pathways' do |t|
74
+ pathways = {}
75
+ last_pathway = nil
76
+
77
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
78
+ if line =~ /(P.*):(.*)/
79
+ last_pathway = $1
80
+ pathways[last_pathway] = {:name => $2}
81
+ else
82
+ type, code, name = line.split(/\t/)
83
+ next unless type =='Gene'
84
+ pathways[last_pathway][:genes] ||= []
85
+ pathways[last_pathway][:genes] << name
86
+ end
87
+ end
88
+
89
+ File.open(t.name, 'w') do |f|
90
+ f.puts "#" + ['Pathway ID', 'Name', 'Associated Gene Name'] * "\t"
91
+ pathways.each do |pathway, info|
92
+ next if info[:genes].nil?
93
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
94
+ end
95
+ end
96
+ end
97
+
98
+ file :drug_pathway => 'source/pathways' do |t|
99
+ pathways = {}
100
+ last_pathway = nil
101
+
102
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
103
+ if line =~ /(P.*):(.*)/
104
+ last_pathway = $1
105
+ pathways[last_pathway] = {:name => $2}
106
+ else
107
+ type, code, name = line.split(/\t/)
108
+ next unless type =='Drug'
109
+ pathways[last_pathway][:drugs] ||= []
110
+ pathways[last_pathway][:drugs] << name
111
+ end
112
+ end
113
+
114
+ File.open(t.name, 'w') do |f|
115
+ f.puts "#" + %w(ID Name Drugs) * "\t"
116
+ pathways.each do |pathway, info|
117
+ next if info[:drugs].nil?
118
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:drugs] * "|"}"
119
+ end
120
+ end
121
+ end
122
+
123
+
124
+ file :disease_pathway => 'source/pathways' do |t|
125
+ pathways = {}
126
+ last_pathway = nil
127
+
128
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
129
+ if line =~ /(P.*):(.*)/
130
+ last_pathway = $1
131
+ pathways[last_pathway] = {:name => $2}
132
+ else
133
+ type, code, name = line.split(/\t/)
134
+ next unless type =='Disease'
135
+ pathways[last_pathway][:diseases] ||= []
136
+ pathways[last_pathway][:diseases] << name
137
+ end
138
+ end
139
+
140
+ File.open(t.name, 'w') do |f|
141
+ f.puts "#" + %w(ID Name Diseases) * "\t"
142
+ pathways.each do |pathway, info|
143
+ next if info[:diseases].nil?
144
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
145
+ end
146
+ end
147
+ end
148
+
149
+ add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
@@ -0,0 +1,30 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
4
+ "chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
5
+
6
+ process_tsv :protein_chemical, 'protein_chemicals',
7
+ :native => 1,
8
+ :grep => "9606\.",
9
+ :fix => lambda{|l| l.sub(/9606\./,'')},
10
+ :keep_empty => true do
11
+
12
+ headers ['Ensembl Protein ID', 'Chemical', 'Score']
13
+ end
14
+
15
+ $grep_re = []
16
+ process_tsv :chemicals, 'chemicals',
17
+ :grep => $grep_re,
18
+ :native => 0 do
19
+
20
+ Rake::Task['protein_chemical'].invoke
21
+
22
+ Log.debug "Getting chemicals"
23
+ chemicals = TSV.new('protein_chemical', :native => 1, :other => []).keys
24
+ Log.debug "Getting chemicals [done]"
25
+
26
+ $grep_re.replace chemicals
27
+
28
+ headers ['Ensembl Protein ID', 'Chemical', 'Score']
29
+ end
30
+
@@ -0,0 +1,8 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_protein" => "http://string-db.org:8080/newstring_download/protein.links.v8.3.txt.gz"
4
+
5
+ process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')} do
6
+ headers ['Ensembl Protein ID', 'Ensembl Protein ID', 'Score']
7
+ end
8
+
@@ -0,0 +1,93 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '../../../lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require 'rbbt/util/tsv'
5
+ require 'rbbt/util/open'
6
+ require 'rbbt/util/log'
7
+
8
+ SOURCE_DIR = 'source'
9
+ def define_source_tasks(sources)
10
+ sources.each do |name, url|
11
+ file File.join(SOURCE_DIR, name) do |t|
12
+ FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
13
+ Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
14
+ Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
15
+ end
16
+ end
17
+ end
18
+
19
+ $__headers = nil
20
+ def headers(values)
21
+ $__headers = values
22
+ end
23
+
24
+ $__data = nil
25
+ def data(&block)
26
+ $__data = block
27
+ end
28
+
29
+ $__tsv_tasks = []
30
+ def tsv_tasks
31
+ $__tsv_tasks
32
+ end
33
+
34
+ $__files = []
35
+ def add_to_defaults(list)
36
+ $__files = list
37
+ end
38
+
39
+ def process_tsv(file, source, options = {}, &block)
40
+
41
+ $__tsv_tasks << file
42
+
43
+ file file => File.join(SOURCE_DIR, source) do |t|
44
+ block.call
45
+
46
+ d = TSV.new(t.prerequisites.first, options)
47
+
48
+ if d.fields != nil
49
+ data_fields = d.fields.dup.unshift d.key_field
50
+ if $__headers.nil?
51
+ $__headers = data_fields
52
+ else
53
+ $__headers = data_fields.zip($__headers).collect{|l| l.compact.last}
54
+ end
55
+ end
56
+
57
+ if d.fields
58
+ headers = d.fields.dup.unshift d.key_field
59
+ else
60
+ headers = nil
61
+ end
62
+
63
+ File.open(t.name.to_s, 'w') do |f|
64
+ f.puts "#" + $__headers * "\t" if $__headers != nil
65
+ d.each do |key, values|
66
+ if $__data.nil?
67
+ line = values.unshift key
68
+ else
69
+ line = $__data.call key, values
70
+ end
71
+
72
+ if Array === line
73
+ key = line.shift
74
+ fields = line.collect{|elem| Array === elem ? elem * "|" : elem }
75
+ fields.unshift key
76
+ f.puts fields * "\t"
77
+ else
78
+ f.puts line
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ task :default do |t|
86
+ ($__tsv_tasks + $__files).each do |file| Rake::Task[file].invoke end
87
+ end
88
+
89
+ task :all => :default
90
+
91
+ task :clean do
92
+ ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
93
+ end
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/cancer'
5
+
6
+ class TestCancer < Test::Unit::TestCase
7
+ def test_anais_annotations
8
+ assert TSV.new(Cancer.anais_annotations)['ENSG00000087460']['Tumor Type'].include? 'Adrenocortical'
9
+ end
10
+ end
11
+
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/matador'
5
+
6
+ class TestMatador < Test::Unit::TestCase
7
+ def test_matador
8
+ assert_equal 'procainamide', TSV.new(Matador.protein_drug)['ENSP00000343023']['Chemical'].first
9
+ end
10
+ end
11
+
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/pharmagkb'
5
+
6
+ class TestPhGKB < Test::Unit::TestCase
7
+ def test_phgkb
8
+ assert TSV.new(PharmaGKB.variants)['rs25487']['Associated Gene Name'].include? 'XRCC1'
9
+ end
10
+ end
11
+
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/stitch'
5
+
6
+ class TestSTITCH < Test::Unit::TestCase
7
+ def test_stitch
8
+ assert TSV.new(STITCH.chemicals).keys.any?
9
+ end
10
+ end
11
+
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ class Test::Unit::TestCase
6
+ def test_datafile(file)
7
+ File.join(File.dirname(__FILE__), 'data', file)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbbt-phgx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Miguel Vazquez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-10 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Pharmaco-genomics related data sources
23
+ email: miguel.vazquez@fdi.ucm.es
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - LICENSE
30
+ files:
31
+ - LICENSE
32
+ - lib/phgx.rb
33
+ - lib/rbbt/sources/cancer.rb
34
+ - lib/rbbt/sources/kegg.rb
35
+ - lib/rbbt/sources/matador.rb
36
+ - lib/rbbt/sources/nci.rb
37
+ - lib/rbbt/sources/pharmagkb.rb
38
+ - lib/rbbt/sources/stitch.rb
39
+ - lib/rbbt/sources/string.rb
40
+ - share/install/KEGG/Rakefile
41
+ - share/install/Matador/Rakefile
42
+ - share/install/NCI/Rakefile
43
+ - share/install/PharmaGKB/Rakefile
44
+ - share/install/STITCH/Rakefile
45
+ - share/install/STRING/Rakefile
46
+ - share/install/lib/rake_helper.rb
47
+ - test/rbbt/sources/test_cancer.rb
48
+ - test/rbbt/sources/test_matador.rb
49
+ - test/rbbt/sources/test_pharmagkb.rb
50
+ - test/rbbt/sources/test_stitch.rb
51
+ - test/test_helper.rb
52
+ has_rdoc: true
53
+ homepage: http://github.com/mikisvaz/rbbt-phgx
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.3.7
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
86
+ test_files:
87
+ - test/rbbt/sources/test_cancer.rb
88
+ - test/rbbt/sources/test_matador.rb
89
+ - test/rbbt/sources/test_pharmagkb.rb
90
+ - test/rbbt/sources/test_stitch.rb
91
+ - test/test_helper.rb