rbbt-phgx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2011 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/lib/phgx.rb ADDED
@@ -0,0 +1,16 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/util/pkg_config'
3
+ require 'rbbt/util/pkg_data'
4
+ require 'rbbt/util/pkg_software'
5
+ require 'rbbt/util/open'
6
+ require 'rbbt/util/tmpfile'
7
+ require 'rbbt/util/filecache'
8
+
9
+ module PhGx
10
+ extend PKGConfig
11
+ extend PKGData
12
+ extend PKGSoftware
13
+
14
+ self.load_cfg(%w(datadir), "datadir: #{File.join(ENV['HOME'], 'phgx', 'data')}\n")
15
+ end
16
+
@@ -0,0 +1,12 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module Cancer
5
+ PhGx.add_datafiles :anais_annotations => ['Cancer', 'Cancer/anais-annotations.txt'],
6
+ :anais_interactions => ['Cancer', 'Cancer/anais-interactions.txt']
7
+
8
+ PKG = PhGx
9
+ extend DataModule
10
+ end
11
+
12
+ if __FILE__ == $0 then NCI.all end
@@ -0,0 +1,9 @@
1
+ require 'rbbt/util/data_module'
2
+ require 'phgx'
3
+
4
+ module KEGG
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then KEGG.all end
@@ -0,0 +1,9 @@
1
+ require 'rbbt/util/data_module'
2
+ require 'phgx'
3
+
4
+ module Matador
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then Matador.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module NCI
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then NCI.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module PharmaGKB
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then PharmaGKB.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module STITCH
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then STITCH.all end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+ require 'rbbt/util/data_module'
3
+
4
+ module STRING
5
+ PKG = PhGx
6
+ extend DataModule
7
+ end
8
+
9
+ if __FILE__ == $0 then STRING.all end
@@ -0,0 +1,109 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
4
+ "hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
5
+ "drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
6
+ "pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
7
+
8
+
9
+ file :genes => 'source/h.sapiens' do |t|
10
+ pairs = {}
11
+ entry = nil
12
+ Open.read(t.prerequisites.first).each do |line|
13
+ if line =~ /^ENTRY\s+(\d+)/
14
+ entry = $1
15
+ next
16
+ end
17
+
18
+ if line =~ /Ensembl: (ENSG\d+)/
19
+ pairs[entry] = $1
20
+ end
21
+ end
22
+
23
+ Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
24
+ end
25
+
26
+ file :gene_drug => 'source/drugs' do |t|
27
+ pairs = {}
28
+ drug = nil
29
+ Open.read(t.prerequisites.first).
30
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
31
+ if line =~ /^ENTRY\s+(\w+)/
32
+ drug = $1
33
+ next
34
+ end
35
+
36
+ if line =~ /TARGET.*?\[HSA:(.*?)\]/
37
+ genes = $1.split(/\s/)
38
+ genes.each do |gene|
39
+ pairs[gene] ||= []
40
+ pairs[gene] << drug
41
+ end
42
+ end
43
+ end
44
+
45
+ Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
46
+ end
47
+
48
+ file :drugs => 'source/drugs' do |t|
49
+ info = {}
50
+ drug = nil
51
+ Open.read(t.prerequisites.first).
52
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
53
+ if line =~ /^ENTRY\s+(\w+)/
54
+ drug = $1
55
+ next
56
+ end
57
+
58
+ if line =~ /^NAME(.*)/
59
+ names = $1.split(/;/)
60
+ names.each do |name|
61
+ info[drug] ||= [[],[]]
62
+ info[drug][0] << name.chomp.strip
63
+ end
64
+ end
65
+
66
+ if line =~ /^DBLINKS(.*)/
67
+ $1.match(/PubChem: (\d*)/)
68
+ pubchem = $1
69
+ next unless pubchem
70
+ info[drug] ||= [[],[]]
71
+ info[drug][1] << pubchem.chomp.strip
72
+ end
73
+ end
74
+
75
+ Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
76
+ end
77
+
78
+ file :pathways => 'source/pathways' do |t|
79
+ descs = {}
80
+ names = {}
81
+ pathway = nil
82
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
83
+ if line =~ /ENTRY\s+(\w+)/
84
+ pathway = $1.strip
85
+ end
86
+
87
+ if line =~ /NAME (.*)/
88
+ names[pathway] = $1.strip
89
+ end
90
+
91
+ if line =~ /DESCRIPTION (.*)/
92
+ descs[pathway] = $1.strip
93
+ end
94
+ end
95
+
96
+ Open.write(t.name, ['#KEGG Pathway ID', 'Name', 'Description'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway]] * "\t"} * "\n")
97
+ end
98
+
99
+ process_tsv :gene_pathway, 'hsa_gene_map.tab',
100
+ :sep2 => ' ' do
101
+ headers ['KEGG Gene ID', 'KEGG Pathways']
102
+ data do |gene, pathway|
103
+ "hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
104
+ end
105
+ end
106
+
107
+ add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
108
+
109
+
@@ -0,0 +1,15 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "matador" => "http://matador.embl.de/media/download/matador.tsv.gz"
4
+
5
+
6
+ process_tsv :protein_drug, 'matador',
7
+ :native => 3,
8
+ :fix => lambda{|l| l.sub(/9606./,'')},
9
+ :extra => [1,0,7,8,9,10,11,12],
10
+ :header_hash => "",
11
+ :keep_empty => true do
12
+
13
+ headers ['Ensembl Protein ID', 'Chemical', 'Matador ID', 'Score', 'Annotation', 'Mesh_Score', 'Mesh_Annotation', 'Matador_Score', 'Matador_Annotation']
14
+ end
15
+
@@ -0,0 +1,137 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "cancer_gene" => "https://gforge.nci.nih.gov/frs/download.php/6819/NCI_CancerIndex_allphases_disease.zip",
4
+ "gene_drug" => "https://gforge.nci.nih.gov/frs/download.php/6821/NCI_CancerIndex_allphases_compound.zip"
5
+
6
+
7
+
8
+ file :gene_drug => 'source/gene_drug' do |t|
9
+ entry = nil
10
+ uniprot = nil
11
+ drugs = []
12
+ concepts = []
13
+ statements = []
14
+ pmids = []
15
+
16
+ info = {}
17
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
18
+ if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
19
+ if entry != nil
20
+ drug_info = {}
21
+ TSV.zip_fields([drugs, concepts, statements, pmids]).each do |drug_fields|
22
+ drug, concept, statement, pmid = drug_fields
23
+ drug_info[drug] ||= {:statements => [], :pmids => []}
24
+ drug_info[drug][:concept] = concept
25
+ drug_info[drug][:statements] << statement
26
+ drug_info[drug][:pmids] << pmid
27
+ end
28
+ drugs = drug_info.keys
29
+ concepts = drug_info.values_at(*drugs).collect{|v| v[:concept]}
30
+ statements = drug_info.values_at(*drugs).collect{|v| v[:statements] * ";;"}
31
+ pmids = drug_info.values_at(*drugs).collect{|v| v[:pmids] * ";;"}
32
+ info[entry] = [entry, drugs * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
33
+ end
34
+ entry = $1
35
+ aliases = []
36
+ uniprot = nil
37
+ drugs = []
38
+ concepts = []
39
+ statements = []
40
+ pmids = []
41
+ end
42
+
43
+ if line =~ /<UniProtID>(.*)<\/UniProtID>/
44
+ uniprot = $1
45
+ end
46
+
47
+ if line =~ /<DrugTerm>(.*)<\/DrugTerm>/
48
+ drugs << $1 || ""
49
+ end
50
+
51
+ if line =~ /<NCIDrugConceptCode>(.*)<\/NCIDrugConceptCode>/
52
+ concepts << $1 || ""
53
+ end
54
+
55
+ if line =~ /<Statement>(.*)<\/Statement>/
56
+ statements << $1 || ""
57
+ end
58
+
59
+ if line =~ /<PubMedID>(.*)<\/PubMedID>/
60
+ pmids << $1 || ""
61
+ end
62
+ end
63
+
64
+ File.open(t.name, 'w') do |f|
65
+ f.puts "#" + ['Associated Gene Name', 'Drugs', 'Drug Concepts', 'Statements', 'PMIDS'] * "\t"
66
+ info.each do |entry, line|
67
+ f.puts line
68
+ end
69
+ end
70
+ end
71
+
72
+ file :gene_cancer => 'source/cancer_gene' do |t|
73
+ entry = nil
74
+ uniprot = nil
75
+ diseases = []
76
+ concepts = []
77
+ statements = []
78
+ pmids = []
79
+
80
+ info = {}
81
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
82
+ if line =~ /<HUGOGeneSymbol>(.*)<\/HUGOGeneSymbol>/
83
+ if entry != nil
84
+ disease_info = {}
85
+ TSV.zip_fields([diseases, concepts, statements, pmids]).each do |disease_fields|
86
+ disease, concept, statement, pmid = disease_fields
87
+ disease_info[disease] ||= {:statements => [], :pmids => []}
88
+ disease_info[disease][:concept] = concept
89
+ disease_info[disease][:statements] << statement
90
+ disease_info[disease][:pmids] << pmid
91
+ end
92
+ diseases = disease_info.keys
93
+ concepts = disease_info.values_at(*diseases).collect{|v| v[:concept]}
94
+ statements = disease_info.values_at(*diseases).collect{|v| v[:statements] * ";;"}
95
+ pmids = disease_info.values_at(*diseases).collect{|v| v[:pmids] * ";;"}
96
+ info[entry] = [entry, diseases * "|", concepts * "|", statements * "|", pmids * "|"] * "\t"
97
+ end
98
+ entry = $1
99
+ aliases = []
100
+ uniprot = nil
101
+ diseases = []
102
+ concepts = []
103
+ statements = []
104
+ pmids = []
105
+ end
106
+
107
+ if line =~ /<UniProtID>(.*)<\/UniProtID>/
108
+ uniprot = $1
109
+ end
110
+
111
+ if line =~ /<MatchedDiseaseTerm>(.*)<\/MatchedDiseaseTerm>/
112
+ diseases << $1 || ""
113
+ end
114
+
115
+ if line =~ /<NCIDiseaseConceptCode>(.*)<\/NCIDiseaseConceptCode>/
116
+ concepts << $1 || ""
117
+ end
118
+
119
+ if line =~ /<Statement>(.*)<\/Statement>/
120
+ statements << $1 || ""
121
+ end
122
+
123
+ if line =~ /<PubMedID>(.*)<\/PubMedID>/
124
+ pmids << $1 || ""
125
+ end
126
+ end
127
+
128
+ File.open(t.name, 'w') do |f|
129
+ f.puts "#" + ['Associated Gene Name', 'Diseases', 'Disease Concepts', 'Statements', 'PMIDS'] * "\t"
130
+ info.each do |entry, line|
131
+ f.puts line
132
+ end
133
+ end
134
+ end
135
+
136
+ add_to_defaults [:gene_cancer, :gene_drug]
137
+
@@ -0,0 +1,149 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
4
+ "drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
5
+ "diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
6
+ "relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
7
+ "variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
8
+ "pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
9
+
10
+
11
+ process_tsv :diseases, 'diseases',
12
+ :header_hash => "",
13
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
14
+ headers ['PhGKB Disease ID']
15
+ end
16
+
17
+ process_tsv :genes, 'genes',
18
+ :header_hash => "",
19
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
20
+ headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
21
+ end
22
+
23
+ process_tsv :drugs, 'drugs',
24
+ :header_hash => "",
25
+ :extra => ['Name', 'DrugBank Id'],
26
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
27
+ headers ['PhGKB Drug ID', 'Name', 'DrugBank ID', ]
28
+ end
29
+
30
+
31
+ process_tsv :gene_drug, 'relationships',
32
+ :select => proc{|l| l =~ /Gene:/ && l =~ /Drug:/},
33
+ :native => 'Entity1_id',
34
+ :extra => ['Entity2_id','Relationship'],
35
+ :header_hash => "",
36
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
37
+ :keep_empty => true do
38
+
39
+ headers ['PhGKB Gene ID', 'Drug']
40
+ end
41
+
42
+ process_tsv :gene_disease, 'relationships',
43
+ :select => proc{|l| l =~ /Gene:/ && l =~ /Disease:/},
44
+ :native => 1,
45
+ :extra => 3,
46
+ :header_hash => "",
47
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
48
+ :keep_empty => true do
49
+
50
+ headers ['PhGKB Gene ID', 'PhGKB Disease ID']
51
+ end
52
+
53
+ process_tsv :variants, 'variants',
54
+ :native => 1,
55
+ :extra => [3,7,8,9,10,4,5],
56
+ :header_hash => "",
57
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
58
+ :keep_empty => true do
59
+
60
+ headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Evidence']
61
+ end
62
+
63
+ file :pathways => 'source/pathways' do |t|
64
+ File.open(t.name, 'w') do |f|
65
+ f.puts "#" + ['PhGKB Pathway ID','Name','Source'] * "\t"
66
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
67
+ next unless line =~ /(PA\d+): (.*) - \((.*)\)/
68
+ f.puts [$1,$2,$3] * "\t"
69
+ end
70
+ end
71
+ end
72
+
73
+ file :gene_pathway => 'source/pathways' do |t|
74
+ pathways = {}
75
+ last_pathway = nil
76
+
77
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
78
+ if line =~ /(P.*):(.*)/
79
+ last_pathway = $1
80
+ pathways[last_pathway] = {:name => $2}
81
+ else
82
+ type, code, name = line.split(/\t/)
83
+ next unless type =='Gene'
84
+ pathways[last_pathway][:genes] ||= []
85
+ pathways[last_pathway][:genes] << name
86
+ end
87
+ end
88
+
89
+ File.open(t.name, 'w') do |f|
90
+ f.puts "#" + ['Pathway ID', 'Name', 'Associated Gene Name'] * "\t"
91
+ pathways.each do |pathway, info|
92
+ next if info[:genes].nil?
93
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
94
+ end
95
+ end
96
+ end
97
+
98
+ file :drug_pathway => 'source/pathways' do |t|
99
+ pathways = {}
100
+ last_pathway = nil
101
+
102
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
103
+ if line =~ /(P.*):(.*)/
104
+ last_pathway = $1
105
+ pathways[last_pathway] = {:name => $2}
106
+ else
107
+ type, code, name = line.split(/\t/)
108
+ next unless type =='Drug'
109
+ pathways[last_pathway][:drugs] ||= []
110
+ pathways[last_pathway][:drugs] << name
111
+ end
112
+ end
113
+
114
+ File.open(t.name, 'w') do |f|
115
+ f.puts "#" + %w(ID Name Drugs) * "\t"
116
+ pathways.each do |pathway, info|
117
+ next if info[:drugs].nil?
118
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:drugs] * "|"}"
119
+ end
120
+ end
121
+ end
122
+
123
+
124
+ file :disease_pathway => 'source/pathways' do |t|
125
+ pathways = {}
126
+ last_pathway = nil
127
+
128
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
129
+ if line =~ /(P.*):(.*)/
130
+ last_pathway = $1
131
+ pathways[last_pathway] = {:name => $2}
132
+ else
133
+ type, code, name = line.split(/\t/)
134
+ next unless type =='Disease'
135
+ pathways[last_pathway][:diseases] ||= []
136
+ pathways[last_pathway][:diseases] << name
137
+ end
138
+ end
139
+
140
+ File.open(t.name, 'w') do |f|
141
+ f.puts "#" + %w(ID Name Diseases) * "\t"
142
+ pathways.each do |pathway, info|
143
+ next if info[:diseases].nil?
144
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
145
+ end
146
+ end
147
+ end
148
+
149
+ add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
@@ -0,0 +1,30 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
4
+ "chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
5
+
6
+ process_tsv :protein_chemical, 'protein_chemicals',
7
+ :native => 1,
8
+ :grep => "9606\.",
9
+ :fix => lambda{|l| l.sub(/9606\./,'')},
10
+ :keep_empty => true do
11
+
12
+ headers ['Ensembl Protein ID', 'Chemical', 'Score']
13
+ end
14
+
15
+ $grep_re = []
16
+ process_tsv :chemicals, 'chemicals',
17
+ :grep => $grep_re,
18
+ :native => 0 do
19
+
20
+ Rake::Task['protein_chemical'].invoke
21
+
22
+ Log.debug "Getting chemicals"
23
+ chemicals = TSV.new('protein_chemical', :native => 1, :other => []).keys
24
+ Log.debug "Getting chemicals [done]"
25
+
26
+ $grep_re.replace chemicals
27
+
28
+ headers ['Ensembl Protein ID', 'Chemical', 'Score']
29
+ end
30
+
@@ -0,0 +1,8 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_protein" => "http://string-db.org:8080/newstring_download/protein.links.v8.3.txt.gz"
4
+
5
+ process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')} do
6
+ headers ['Ensembl Protein ID', 'Ensembl Protein ID', 'Score']
7
+ end
8
+
@@ -0,0 +1,93 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '../../../lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require 'rbbt/util/tsv'
5
+ require 'rbbt/util/open'
6
+ require 'rbbt/util/log'
7
+
8
+ SOURCE_DIR = 'source'
9
+ def define_source_tasks(sources)
10
+ sources.each do |name, url|
11
+ file File.join(SOURCE_DIR, name) do |t|
12
+ FileUtils.mkdir SOURCE_DIR unless File.exists? SOURCE_DIR
13
+ Log.log "Retrieving file '#{name}' into '#{t.name}': '#{url}'", Log::LOW
14
+ Open.write(t.name, Open.open(url, :cache => false, :wget_options => {"--no-check-certificate" => true, "--quiet" => false, :pipe => true}))
15
+ end
16
+ end
17
+ end
18
+
19
+ $__headers = nil
20
+ def headers(values)
21
+ $__headers = values
22
+ end
23
+
24
+ $__data = nil
25
+ def data(&block)
26
+ $__data = block
27
+ end
28
+
29
+ $__tsv_tasks = []
30
+ def tsv_tasks
31
+ $__tsv_tasks
32
+ end
33
+
34
+ $__files = []
35
+ def add_to_defaults(list)
36
+ $__files = list
37
+ end
38
+
39
+ def process_tsv(file, source, options = {}, &block)
40
+
41
+ $__tsv_tasks << file
42
+
43
+ file file => File.join(SOURCE_DIR, source) do |t|
44
+ block.call
45
+
46
+ d = TSV.new(t.prerequisites.first, options)
47
+
48
+ if d.fields != nil
49
+ data_fields = d.fields.dup.unshift d.key_field
50
+ if $__headers.nil?
51
+ $__headers = data_fields
52
+ else
53
+ $__headers = data_fields.zip($__headers).collect{|l| l.compact.last}
54
+ end
55
+ end
56
+
57
+ if d.fields
58
+ headers = d.fields.dup.unshift d.key_field
59
+ else
60
+ headers = nil
61
+ end
62
+
63
+ File.open(t.name.to_s, 'w') do |f|
64
+ f.puts "#" + $__headers * "\t" if $__headers != nil
65
+ d.each do |key, values|
66
+ if $__data.nil?
67
+ line = values.unshift key
68
+ else
69
+ line = $__data.call key, values
70
+ end
71
+
72
+ if Array === line
73
+ key = line.shift
74
+ fields = line.collect{|elem| Array === elem ? elem * "|" : elem }
75
+ fields.unshift key
76
+ f.puts fields * "\t"
77
+ else
78
+ f.puts line
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ task :default do |t|
86
+ ($__tsv_tasks + $__files).each do |file| Rake::Task[file].invoke end
87
+ end
88
+
89
+ task :all => :default
90
+
91
+ task :clean do
92
+ ($__tsv_tasks + $__files).each do |file| FileUtils.rm file.to_s if File.exists?(file.to_s) end
93
+ end
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/cancer'
5
+
6
+ class TestCancer < Test::Unit::TestCase
7
+ def test_anais_annotations
8
+ assert TSV.new(Cancer.anais_annotations)['ENSG00000087460']['Tumor Type'].include? 'Adrenocortical'
9
+ end
10
+ end
11
+
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/matador'
5
+
6
+ class TestMatador < Test::Unit::TestCase
7
+ def test_matador
8
+ assert_equal 'procainamide', TSV.new(Matador.protein_drug)['ENSP00000343023']['Chemical'].first
9
+ end
10
+ end
11
+
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/pharmagkb'
5
+
6
+ class TestPhGKB < Test::Unit::TestCase
7
+ def test_phgkb
8
+ assert TSV.new(PharmaGKB.variants)['rs25487']['Associated Gene Name'].include? 'XRCC1'
9
+ end
10
+ end
11
+
@@ -0,0 +1,11 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'test/unit'
3
+ require 'rbbt/util/tmpfile'
4
+ require 'rbbt/sources/stitch'
5
+
6
+ class TestSTITCH < Test::Unit::TestCase
7
+ def test_stitch
8
+ assert TSV.new(STITCH.chemicals).keys.any?
9
+ end
10
+ end
11
+
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ class Test::Unit::TestCase
6
+ def test_datafile(file)
7
+ File.join(File.dirname(__FILE__), 'data', file)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbbt-phgx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Miguel Vazquez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-10 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Pharmaco-genomics related data sources
23
+ email: miguel.vazquez@fdi.ucm.es
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - LICENSE
30
+ files:
31
+ - LICENSE
32
+ - lib/phgx.rb
33
+ - lib/rbbt/sources/cancer.rb
34
+ - lib/rbbt/sources/kegg.rb
35
+ - lib/rbbt/sources/matador.rb
36
+ - lib/rbbt/sources/nci.rb
37
+ - lib/rbbt/sources/pharmagkb.rb
38
+ - lib/rbbt/sources/stitch.rb
39
+ - lib/rbbt/sources/string.rb
40
+ - share/install/KEGG/Rakefile
41
+ - share/install/Matador/Rakefile
42
+ - share/install/NCI/Rakefile
43
+ - share/install/PharmaGKB/Rakefile
44
+ - share/install/STITCH/Rakefile
45
+ - share/install/STRING/Rakefile
46
+ - share/install/lib/rake_helper.rb
47
+ - test/rbbt/sources/test_cancer.rb
48
+ - test/rbbt/sources/test_matador.rb
49
+ - test/rbbt/sources/test_pharmagkb.rb
50
+ - test/rbbt/sources/test_stitch.rb
51
+ - test/test_helper.rb
52
+ has_rdoc: true
53
+ homepage: http://github.com/mikisvaz/rbbt-phgx
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.3.7
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
86
+ test_files:
87
+ - test/rbbt/sources/test_cancer.rb
88
+ - test/rbbt/sources/test_matador.rb
89
+ - test/rbbt/sources/test_pharmagkb.rb
90
+ - test/rbbt/sources/test_stitch.rb
91
+ - test/test_helper.rb