rbbt-sources 2.1.7 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/kegg.rb +127 -0
- data/lib/rbbt/sources/matador.rb +9 -0
- data/lib/rbbt/sources/pharmagkb.rb +9 -0
- data/lib/rbbt/sources/pina.rb +35 -0
- data/lib/rbbt/sources/stitch.rb +9 -0
- data/lib/rbbt/sources/string.rb +27 -0
- data/share/install/KEGG/Rakefile +114 -0
- data/share/install/PharmaGKB/Rakefile +211 -0
- data/share/install/Pina/Rakefile +16 -0
- data/share/install/STITCH/Rakefile +30 -0
- data/share/install/STRING/Rakefile +8 -0
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00cd4a9602b9ea2637a620b30cd3d48a6d63a9fe
|
4
|
+
data.tar.gz: c282f8c86de5148343e5a83ea524cdc09435b9fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 701a67455ca18d9c705e2c409628cd5463f7449d2ee40ba4d26cce6f203018db21b9c6ee6f233cf8d80e44e28d3ffcfa08e474678b538b8db7cb80c44e5eac5a
|
7
|
+
data.tar.gz: 4bbcf6f222c01c5f3314617ed7c2458b3cebb9d8b3293ac631305ea2c610c935792fe0e5d6a7402f041aab4304e5586a7989f1e2a097b5dc620f0cb7a208250c
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module KEGG
|
5
|
+
extend Resource
|
6
|
+
self.pkgdir = "phgx"
|
7
|
+
self.subdir = "share/kegg"
|
8
|
+
|
9
|
+
|
10
|
+
KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
|
11
|
+
|
12
|
+
def self.names
|
13
|
+
@@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.descriptions
|
17
|
+
@@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def self.index2genes
|
22
|
+
@@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.index2ens
|
26
|
+
@@index2ens ||= KEGG.identifiers.index(:persist => true)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.index2kegg
|
30
|
+
@@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.id2name(id)
|
34
|
+
names[id]
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.name2id(name)
|
38
|
+
names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def self.description(id)
|
43
|
+
descriptions[id]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if defined? Entity
|
48
|
+
|
49
|
+
module KeggPathway
|
50
|
+
extend Entity
|
51
|
+
self.format = "KEGG Pathway ID"
|
52
|
+
|
53
|
+
self.annotation :organism
|
54
|
+
|
55
|
+
def self.filter(query, field = nil, options = nil, entity = nil)
|
56
|
+
return true if query == entity
|
57
|
+
|
58
|
+
return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
|
59
|
+
|
60
|
+
false
|
61
|
+
end
|
62
|
+
|
63
|
+
property :name => :single2array do
|
64
|
+
return nil if self.nil?
|
65
|
+
name = KEGG.id2name(self)
|
66
|
+
name.sub(/ - Homo.*/,'') unless name.nil?
|
67
|
+
end
|
68
|
+
|
69
|
+
property :description => :single2array do
|
70
|
+
KEGG.description(self)
|
71
|
+
end
|
72
|
+
|
73
|
+
property :genes => :array2single do |*args|
|
74
|
+
organism = args.first || self.organism
|
75
|
+
KEGG.index2genes.values_at(*self).
|
76
|
+
each{|gene| gene.organism = organism if gene.respond_to? :organism }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if defined? Gene and Entity === Gene
|
81
|
+
module Gene
|
82
|
+
self.format = "KEGG Gene ID"
|
83
|
+
|
84
|
+
def to_kegg
|
85
|
+
return self if format == "KEGG Gene ID"
|
86
|
+
if Array === self
|
87
|
+
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
88
|
+
else
|
89
|
+
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def from_kegg
|
94
|
+
return self unless format == "KEGG Gene ID"
|
95
|
+
if Array === self
|
96
|
+
Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
97
|
+
else
|
98
|
+
Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.gene_kegg_pathway_index
|
103
|
+
@@gene_kegg_pathway_index ||=
|
104
|
+
KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
|
105
|
+
end
|
106
|
+
|
107
|
+
property :to => :array2single do |new_format|
|
108
|
+
case
|
109
|
+
when format == new_format
|
110
|
+
self
|
111
|
+
when format == "KEGG Gene ID"
|
112
|
+
ensembl = from_kegg.clean_annotations
|
113
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
114
|
+
when new_format == "KEGG Gene ID"
|
115
|
+
to_kegg
|
116
|
+
else
|
117
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
property :kegg_pathways => :array2single do
|
122
|
+
@kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
|
123
|
+
each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'phgx'
|
2
|
+
|
3
|
+
module Pina
|
4
|
+
extend Resource
|
5
|
+
self.pkgdir = "phgx"
|
6
|
+
self.subdir = "share/pina"
|
7
|
+
|
8
|
+
Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
|
9
|
+
end
|
10
|
+
|
11
|
+
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
+
require 'rbbt/entity/gene'
|
13
|
+
require 'rbbt/entity/interactor'
|
14
|
+
require 'rbbt/sources/PSI_MI'
|
15
|
+
|
16
|
+
module Gene
|
17
|
+
property :pina_interactors => :array2single do
|
18
|
+
ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
|
19
|
+
pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
|
20
|
+
|
21
|
+
int = self.ensembl.collect do |ens|
|
22
|
+
uniprot = ens2uniprot[ens]
|
23
|
+
list = pina.values_at(*uniprot).compact.collect do |v|
|
24
|
+
Misc.zip_fields(v).collect do |o, method, articles|
|
25
|
+
Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
|
26
|
+
end
|
27
|
+
end.flatten.uniq
|
28
|
+
Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
29
|
+
end
|
30
|
+
|
31
|
+
Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'phgx'
|
2
|
+
|
3
|
+
module STRING
|
4
|
+
extend Resource
|
5
|
+
self.pkgdir = "phgx"
|
6
|
+
self.subdir = "share/string"
|
7
|
+
|
8
|
+
STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
|
9
|
+
end
|
10
|
+
|
11
|
+
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
+
module Gene
|
13
|
+
property :string_interactors => :array2single do |*args|
|
14
|
+
threshold = args.first || 800
|
15
|
+
string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
|
16
|
+
all = self.ensembl.collect do |gene|
|
17
|
+
interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
|
18
|
+
Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
|
19
|
+
end
|
20
|
+
|
21
|
+
all.compact.first.annotate all if Annotated === all.compact.first
|
22
|
+
|
23
|
+
all
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
|
4
|
+
"hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
|
5
|
+
"drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
|
6
|
+
"pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
|
7
|
+
|
8
|
+
|
9
|
+
file :identifiers => 'source/h.sapiens' do |t|
|
10
|
+
pairs = {}
|
11
|
+
entry = nil
|
12
|
+
Open.read(t.prerequisites.first).each do |line|
|
13
|
+
if line =~ /^ENTRY\s+(\d+)/
|
14
|
+
entry = $1
|
15
|
+
next
|
16
|
+
end
|
17
|
+
|
18
|
+
if line =~ /Ensembl: (ENSG\d+)/
|
19
|
+
pairs[entry] = $1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
|
24
|
+
end
|
25
|
+
|
26
|
+
file :gene_drug => 'source/drugs' do |t|
|
27
|
+
pairs = {}
|
28
|
+
drug = nil
|
29
|
+
Open.read(t.prerequisites.first).
|
30
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
31
|
+
if line =~ /^ENTRY\s+(\w+)/
|
32
|
+
drug = $1
|
33
|
+
next
|
34
|
+
end
|
35
|
+
|
36
|
+
if line =~ /TARGET.*?\[HSA:(.*?)\]/
|
37
|
+
genes = $1.split(/\s/)
|
38
|
+
genes.each do |gene|
|
39
|
+
pairs[gene] ||= []
|
40
|
+
pairs[gene] << drug
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
file :drugs => 'source/drugs' do |t|
|
49
|
+
info = {}
|
50
|
+
drug = nil
|
51
|
+
Open.read(t.prerequisites.first).
|
52
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
53
|
+
if line =~ /^ENTRY\s+(\w+)/
|
54
|
+
drug = $1
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
if line =~ /^NAME(.*)/
|
59
|
+
names = $1.split(/;/)
|
60
|
+
names.each do |name|
|
61
|
+
info[drug] ||= [[],[]]
|
62
|
+
info[drug][0] << name.chomp.strip
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if line =~ /^DBLINKS(.*)/
|
67
|
+
$1.match(/PubChem: (\d*)/)
|
68
|
+
pubchem = $1
|
69
|
+
next unless pubchem
|
70
|
+
info[drug] ||= [[],[]]
|
71
|
+
info[drug][1] << pubchem.chomp.strip
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
|
76
|
+
end
|
77
|
+
|
78
|
+
file :pathways => 'source/pathways' do |t|
|
79
|
+
descs = {}
|
80
|
+
names = {}
|
81
|
+
klass = {}
|
82
|
+
pathway = nil
|
83
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
84
|
+
if line =~ /ENTRY\s+(\w+)/
|
85
|
+
pathway = $1.strip
|
86
|
+
end
|
87
|
+
|
88
|
+
if line =~ /NAME (.*)/
|
89
|
+
names[pathway] = $1.strip
|
90
|
+
end
|
91
|
+
|
92
|
+
if line =~ /DESCRIPTION (.*)/
|
93
|
+
descs[pathway] = $1.strip
|
94
|
+
end
|
95
|
+
|
96
|
+
if line =~ /CLASS (.*)/
|
97
|
+
klass[pathway] = $1.strip
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
|
102
|
+
end
|
103
|
+
|
104
|
+
process_tsv :gene_pathway, 'hsa_gene_map.tab',
|
105
|
+
:sep2 => ' ' do
|
106
|
+
headers ['KEGG Gene ID', 'KEGG Pathway ID']
|
107
|
+
data do |gene, pathway|
|
108
|
+
"hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
|
113
|
+
|
114
|
+
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
|
4
|
+
"drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
|
5
|
+
"diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
|
6
|
+
"relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
|
7
|
+
"variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
|
8
|
+
"pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
|
9
|
+
|
10
|
+
|
11
|
+
process_tsv :diseases, 'diseases',
|
12
|
+
:header_hash => "",
|
13
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
14
|
+
headers ['PhGKB Disease ID']
|
15
|
+
end
|
16
|
+
|
17
|
+
process_tsv :identifiers, 'genes',
|
18
|
+
:header_hash => "",
|
19
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
20
|
+
headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
|
21
|
+
end
|
22
|
+
|
23
|
+
process_tsv :drugs, 'drugs',
|
24
|
+
:header_hash => "",
|
25
|
+
:fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
|
26
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
27
|
+
headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
|
28
|
+
end
|
29
|
+
|
30
|
+
process_tsv :relationships, 'relationships',
|
31
|
+
:header_hash => "",
|
32
|
+
:merge => true,
|
33
|
+
:fix => proc{|l|
|
34
|
+
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
35
|
+
parts = l.split("\t")
|
36
|
+
rels = parts.pop
|
37
|
+
parts = [parts.values_at(0, 2) * ":"]
|
38
|
+
pmids = []
|
39
|
+
pathways = []
|
40
|
+
rsids = []
|
41
|
+
rels.split(',').each do |r|
|
42
|
+
case
|
43
|
+
when r =~ /PMID:(.*)/
|
44
|
+
pmids << $1
|
45
|
+
when r =~ /Pathway:(.*)/
|
46
|
+
pathways << $1
|
47
|
+
when r =~ /RSID:(.*)/
|
48
|
+
rsids << $1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
parts << pmids * "|"
|
53
|
+
parts << pathways * "|"
|
54
|
+
parts << rsids * "|"
|
55
|
+
|
56
|
+
parts * "\t"
|
57
|
+
},
|
58
|
+
:keep_empty => true do
|
59
|
+
|
60
|
+
headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
process_tsv :gene_drug, 'relationships',
|
65
|
+
:select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
|
66
|
+
:header_hash => "",
|
67
|
+
:merge => true,
|
68
|
+
:fix => proc{|l|
|
69
|
+
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
70
|
+
parts = l.split("\t")
|
71
|
+
rels = parts.pop
|
72
|
+
parts = parts.values_at 0, 2
|
73
|
+
|
74
|
+
parts * "\t"
|
75
|
+
},
|
76
|
+
:keep_empty => true do
|
77
|
+
|
78
|
+
headers ['PhGKB Gene ID', 'PhGKB Drug ID']
|
79
|
+
end
|
80
|
+
|
81
|
+
process_tsv :gene_disease, 'relationships',
|
82
|
+
:select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
|
83
|
+
:key_field => 1,
|
84
|
+
:fields => 3,
|
85
|
+
:merge => true,
|
86
|
+
:header_hash => "",
|
87
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
88
|
+
:keep_empty => true do
|
89
|
+
|
90
|
+
headers ['PhGKB Gene ID', 'PhGKB Disease ID']
|
91
|
+
end
|
92
|
+
|
93
|
+
process_tsv :variants, 'variants',
|
94
|
+
:key_field => 1,
|
95
|
+
:fields => [3,7,8,9,10,4,6,5],
|
96
|
+
:header_hash => "",
|
97
|
+
:merge => true,
|
98
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
99
|
+
:keep_empty => true do
|
100
|
+
|
101
|
+
headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
|
102
|
+
end
|
103
|
+
|
104
|
+
file :pathways => 'source/pathways' do |t|
|
105
|
+
File.open(t.name, 'w') do |f|
|
106
|
+
f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
|
107
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
108
|
+
case
|
109
|
+
when line =~ /(PA\d+): (.*) - \((.*)\)/
|
110
|
+
f.puts [$1,$2,$3] * "\t"
|
111
|
+
when line =~ /(PA\d+): (.*)/
|
112
|
+
f.puts [$1,$2,""] * "\t"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
file :gene_pathway => 'source/pathways' do |t|
|
119
|
+
pathways = {}
|
120
|
+
last_pathway = nil
|
121
|
+
|
122
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
123
|
+
if line =~ /(P.*):(.*)/
|
124
|
+
last_pathway = $1
|
125
|
+
pathways[last_pathway] = {:name => $2}
|
126
|
+
else
|
127
|
+
type, code, name = line.split(/\t/)
|
128
|
+
next unless type =='Gene'
|
129
|
+
pathways[last_pathway][:genes] ||= []
|
130
|
+
pathways[last_pathway][:genes] << name
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
file :gene_pathway => 'source/pathways' do |t|
|
136
|
+
pathways = {}
|
137
|
+
last_pathway = nil
|
138
|
+
|
139
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
140
|
+
if line =~ /(P.*):(.*)/
|
141
|
+
last_pathway = $1
|
142
|
+
pathways[last_pathway] = {:name => $2}
|
143
|
+
else
|
144
|
+
type, code, name = line.split(/\t/)
|
145
|
+
next unless type =='Gene'
|
146
|
+
pathways[last_pathway][:genes] ||= []
|
147
|
+
pathways[last_pathway][:genes] << name
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
File.open(t.name, 'w') do |f|
|
152
|
+
f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
|
153
|
+
pathways.each do |pathway, info|
|
154
|
+
next if info[:genes].nil?
|
155
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
file :pathway_drugs => 'source/pathways' do |t|
|
161
|
+
pathways = {}
|
162
|
+
last_pathway = nil
|
163
|
+
|
164
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
165
|
+
if line =~ /(P.*):(.*)/
|
166
|
+
last_pathway = $1
|
167
|
+
pathways[last_pathway] = {:name => $2}
|
168
|
+
else
|
169
|
+
type, code, name = line.split(/\t/)
|
170
|
+
next unless type =='Drug'
|
171
|
+
pathways[last_pathway][:drugs] ||= []
|
172
|
+
pathways[last_pathway][:drugs] << code
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
File.open(t.name, 'w') do |f|
|
177
|
+
f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
|
178
|
+
pathways.each do |pathway, info|
|
179
|
+
next if info[:drugs].nil?
|
180
|
+
f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
file :disease_pathway => 'source/pathways' do |t|
|
187
|
+
pathways = {}
|
188
|
+
last_pathway = nil
|
189
|
+
|
190
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
191
|
+
if line =~ /(P.*):(.*)/
|
192
|
+
last_pathway = $1
|
193
|
+
pathways[last_pathway] = {:name => $2}
|
194
|
+
else
|
195
|
+
type, code, name = line.split(/\t/)
|
196
|
+
next unless type =='Disease'
|
197
|
+
pathways[last_pathway][:diseases] ||= []
|
198
|
+
pathways[last_pathway][:diseases] << name
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
File.open(t.name, 'w') do |f|
|
203
|
+
f.puts "#" + %w(ID Name Diseases) * "\t"
|
204
|
+
pathways.each do |pathway, info|
|
205
|
+
next if info[:diseases].nil?
|
206
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
|
4
|
+
|
5
|
+
process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
|
6
|
+
:key => 0,
|
7
|
+
:fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
|
8
|
+
:fields => [1,6,8],
|
9
|
+
:header_hash => "#",
|
10
|
+
:merge => true,
|
11
|
+
:keep_empty => true do
|
12
|
+
|
13
|
+
headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
|
14
|
+
end
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
|
4
|
+
"chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
|
5
|
+
|
6
|
+
process_tsv :protein_chemical, 'protein_chemicals',
|
7
|
+
:key => 1,
|
8
|
+
:grep => "9606\.",
|
9
|
+
:fix => lambda{|l| l.sub(/9606\./,'')},
|
10
|
+
:keep_empty => true do
|
11
|
+
|
12
|
+
headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
|
13
|
+
end
|
14
|
+
|
15
|
+
$grep_re = []
|
16
|
+
process_tsv :chemicals, 'chemicals',
|
17
|
+
:grep => $grep_re,
|
18
|
+
:key => 0 do
|
19
|
+
|
20
|
+
Rake::Task['protein_chemical'].invoke
|
21
|
+
|
22
|
+
Log.debug "Getting chemicals"
|
23
|
+
chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
|
24
|
+
Log.debug "Getting chemicals [done]"
|
25
|
+
|
26
|
+
$grep_re.replace chemicals
|
27
|
+
|
28
|
+
headers ['STITCH Chemical ID', 'Name', 'Source']
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
|
4
|
+
|
5
|
+
process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
|
6
|
+
headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
|
7
|
+
end
|
8
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -105,24 +105,34 @@ files:
|
|
105
105
|
- lib/rbbt/sources/go.rb
|
106
106
|
- lib/rbbt/sources/gscholar.rb
|
107
107
|
- lib/rbbt/sources/jochem.rb
|
108
|
+
- lib/rbbt/sources/kegg.rb
|
109
|
+
- lib/rbbt/sources/matador.rb
|
108
110
|
- lib/rbbt/sources/organism.rb
|
109
111
|
- lib/rbbt/sources/pfam.rb
|
112
|
+
- lib/rbbt/sources/pharmagkb.rb
|
113
|
+
- lib/rbbt/sources/pina.rb
|
110
114
|
- lib/rbbt/sources/polysearch.rb
|
111
115
|
- lib/rbbt/sources/pubmed.rb
|
112
116
|
- lib/rbbt/sources/reactome.rb
|
117
|
+
- lib/rbbt/sources/stitch.rb
|
118
|
+
- lib/rbbt/sources/string.rb
|
113
119
|
- lib/rbbt/sources/tfacts.rb
|
114
120
|
- lib/rbbt/sources/uniprot.rb
|
115
121
|
- lib/rbbt/sources/wgEncodeBroadHmm.rb
|
116
122
|
- share/Ensembl/release_dates
|
117
123
|
- share/install/Genomes1000/Rakefile
|
118
124
|
- share/install/JoChem/Rakefile
|
125
|
+
- share/install/KEGG/Rakefile
|
119
126
|
- share/install/NCI/Rakefile
|
120
127
|
- share/install/Organism/Hsa/Rakefile
|
121
128
|
- share/install/Organism/Mmu/Rakefile
|
122
129
|
- share/install/Organism/Rno/Rakefile
|
123
130
|
- share/install/Organism/Sce/Rakefile
|
124
131
|
- share/install/Organism/organism_helpers.rb
|
132
|
+
- share/install/PharmaGKB/Rakefile
|
133
|
+
- share/install/Pina/Rakefile
|
125
134
|
- share/install/STITCH/Rakefile
|
135
|
+
- share/install/STRING/Rakefile
|
126
136
|
- share/install/lib/helpers.rb
|
127
137
|
- test/rbbt/sources/test_biomart.rb
|
128
138
|
- test/rbbt/sources/test_entrez.rb
|