rbbt-phgx 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/mutation/mutation_assessor.rb +1 -1
- data/lib/rbbt/sources/kegg.rb +47 -0
- data/lib/rbbt/sources/pina.rb +4 -1
- data/lib/rbbt/sources/stitch.rb +4 -1
- data/lib/rbbt/sources/string.rb +5 -1
- data/share/install/PharmaGKB/Rakefile +73 -14
- data/share/install/STITCH/Rakefile +1 -1
- data/share/install/lib/rake_helper.rb +0 -2
- metadata +6 -6
data/lib/rbbt/sources/kegg.rb
CHANGED
@@ -7,4 +7,51 @@ module KEGG
|
|
7
7
|
self.subdir = "share/kegg"
|
8
8
|
|
9
9
|
KEGG.claim KEGG.root.find, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
|
10
|
+
|
11
|
+
def self.names
|
12
|
+
@@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.descriptions
|
16
|
+
@@descriptions ||= KEGG.pathways.tsv :fields => ["Pathway Description"], :persist => true, :type => :single
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def self.index2ens
|
21
|
+
@@index2ens ||= KEGG.identifiers.index :persist => true
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.index2kegg
|
25
|
+
@@index2kegg ||= KEGG.identifiers.index :target => "KEGG Gene ID", :persist => true
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.id2name(id)
|
29
|
+
names[id]
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.description(id)
|
33
|
+
descriptions[id]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Gene
|
38
|
+
|
39
|
+
def to_kegg
|
40
|
+
if Array === self
|
41
|
+
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism)
|
42
|
+
else
|
43
|
+
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def from_kegg
|
48
|
+
if Array === self
|
49
|
+
Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism)
|
50
|
+
else
|
51
|
+
Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
|
10
57
|
end
|
data/lib/rbbt/sources/pina.rb
CHANGED
data/lib/rbbt/sources/stitch.rb
CHANGED
data/lib/rbbt/sources/string.rb
CHANGED
@@ -22,26 +22,64 @@ end
|
|
22
22
|
|
23
23
|
process_tsv :drugs, 'drugs',
|
24
24
|
:header_hash => "",
|
25
|
-
:fields => ['Name', '
|
25
|
+
:fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
|
26
26
|
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
27
|
-
headers ['PhGKB Drug ID', 'Drug Name', '
|
27
|
+
headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
|
28
|
+
end
|
29
|
+
|
30
|
+
process_tsv :relationships, 'relationships',
|
31
|
+
:header_hash => "",
|
32
|
+
:merge => true,
|
33
|
+
:fix => proc{|l|
|
34
|
+
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
35
|
+
parts = l.split("\t")
|
36
|
+
rels = parts.pop
|
37
|
+
parts = [parts.values_at(0, 2) * ":"]
|
38
|
+
pmids = []
|
39
|
+
pathways = []
|
40
|
+
rsids = []
|
41
|
+
rels.split(',').each do |r|
|
42
|
+
case
|
43
|
+
when r =~ /PMID:(.*)/
|
44
|
+
pmids << $1
|
45
|
+
when r =~ /Pathway:(.*)/
|
46
|
+
pathways << $1
|
47
|
+
when r =~ /RSID:(.*)/
|
48
|
+
rsids << $1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
parts << pmids * "|"
|
53
|
+
parts << pathways * "|"
|
54
|
+
parts << rsids * "|"
|
55
|
+
|
56
|
+
parts * "\t"
|
57
|
+
},
|
58
|
+
:keep_empty => true do
|
59
|
+
|
60
|
+
headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
|
28
61
|
end
|
29
62
|
|
30
63
|
|
31
64
|
process_tsv :gene_drug, 'relationships',
|
32
|
-
:select => proc{|l| l =~
|
33
|
-
:key_field => 'Entity1_id',
|
34
|
-
:fields => ['Entity2_id','Relationship'],
|
65
|
+
:select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
|
35
66
|
:header_hash => "",
|
36
67
|
:merge => true,
|
37
|
-
:fix => proc{|l|
|
68
|
+
:fix => proc{|l|
|
69
|
+
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
70
|
+
parts = l.split("\t")
|
71
|
+
rels = parts.pop
|
72
|
+
parts = parts.values_at 0, 2
|
73
|
+
|
74
|
+
parts * "\t"
|
75
|
+
},
|
38
76
|
:keep_empty => true do
|
39
77
|
|
40
|
-
headers ['PhGKB Gene ID', 'Drug
|
78
|
+
headers ['PhGKB Gene ID', 'PhGKB Drug ID']
|
41
79
|
end
|
42
80
|
|
43
81
|
process_tsv :gene_disease, 'relationships',
|
44
|
-
:select => proc{|l| l =~
|
82
|
+
:select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
|
45
83
|
:key_field => 1,
|
46
84
|
:fields => 3,
|
47
85
|
:merge => true,
|
@@ -67,8 +105,29 @@ file :pathways => 'source/pathways' do |t|
|
|
67
105
|
File.open(t.name, 'w') do |f|
|
68
106
|
f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
|
69
107
|
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
70
|
-
|
71
|
-
|
108
|
+
case
|
109
|
+
when line =~ /(PA\d+): (.*) - \((.*)\)/
|
110
|
+
f.puts [$1,$2,$3] * "\t"
|
111
|
+
when line =~ /(PA\d+): (.*)/
|
112
|
+
f.puts [$1,$2,""] * "\t"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
file :gene_pathway => 'source/pathways' do |t|
|
119
|
+
pathways = {}
|
120
|
+
last_pathway = nil
|
121
|
+
|
122
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
123
|
+
if line =~ /(P.*):(.*)/
|
124
|
+
last_pathway = $1
|
125
|
+
pathways[last_pathway] = {:name => $2}
|
126
|
+
else
|
127
|
+
type, code, name = line.split(/\t/)
|
128
|
+
next unless type =='Gene'
|
129
|
+
pathways[last_pathway][:genes] ||= []
|
130
|
+
pathways[last_pathway][:genes] << name
|
72
131
|
end
|
73
132
|
end
|
74
133
|
end
|
@@ -98,7 +157,7 @@ file :gene_pathway => 'source/pathways' do |t|
|
|
98
157
|
end
|
99
158
|
end
|
100
159
|
|
101
|
-
file :
|
160
|
+
file :pathway_drugs => 'source/pathways' do |t|
|
102
161
|
pathways = {}
|
103
162
|
last_pathway = nil
|
104
163
|
|
@@ -110,15 +169,15 @@ file :drug_pathway => 'source/pathways' do |t|
|
|
110
169
|
type, code, name = line.split(/\t/)
|
111
170
|
next unless type =='Drug'
|
112
171
|
pathways[last_pathway][:drugs] ||= []
|
113
|
-
pathways[last_pathway][:drugs] <<
|
172
|
+
pathways[last_pathway][:drugs] << code
|
114
173
|
end
|
115
174
|
end
|
116
175
|
|
117
176
|
File.open(t.name, 'w') do |f|
|
118
|
-
f.puts "#" +
|
177
|
+
f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
|
119
178
|
pathways.each do |pathway, info|
|
120
179
|
next if info[:drugs].nil?
|
121
|
-
f.puts "#{ pathway }\t#{info[:
|
180
|
+
f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
|
122
181
|
end
|
123
182
|
end
|
124
183
|
end
|
@@ -20,7 +20,7 @@ process_tsv :chemicals, 'chemicals',
|
|
20
20
|
Rake::Task['protein_chemical'].invoke
|
21
21
|
|
22
22
|
Log.debug "Getting chemicals"
|
23
|
-
chemicals = TSV.
|
23
|
+
chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
|
24
24
|
Log.debug "Getting chemicals [done]"
|
25
25
|
|
26
26
|
$grep_re.replace chemicals
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-phgx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 17
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 1
|
10
|
+
version: 0.3.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-10-03 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -118,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
118
|
requirements: []
|
119
119
|
|
120
120
|
rubyforge_project:
|
121
|
-
rubygems_version: 1.
|
121
|
+
rubygems_version: 1.6.2
|
122
122
|
signing_key:
|
123
123
|
specification_version: 3
|
124
124
|
summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|