rbbt-sources 2.1.7 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/kegg.rb +127 -0
- data/lib/rbbt/sources/matador.rb +9 -0
- data/lib/rbbt/sources/pharmagkb.rb +9 -0
- data/lib/rbbt/sources/pina.rb +35 -0
- data/lib/rbbt/sources/stitch.rb +9 -0
- data/lib/rbbt/sources/string.rb +27 -0
- data/share/install/KEGG/Rakefile +114 -0
- data/share/install/PharmaGKB/Rakefile +211 -0
- data/share/install/Pina/Rakefile +16 -0
- data/share/install/STITCH/Rakefile +30 -0
- data/share/install/STRING/Rakefile +8 -0
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00cd4a9602b9ea2637a620b30cd3d48a6d63a9fe
|
4
|
+
data.tar.gz: c282f8c86de5148343e5a83ea524cdc09435b9fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 701a67455ca18d9c705e2c409628cd5463f7449d2ee40ba4d26cce6f203018db21b9c6ee6f233cf8d80e44e28d3ffcfa08e474678b538b8db7cb80c44e5eac5a
|
7
|
+
data.tar.gz: 4bbcf6f222c01c5f3314617ed7c2458b3cebb9d8b3293ac631305ea2c610c935792fe0e5d6a7402f041aab4304e5586a7989f1e2a097b5dc620f0cb7a208250c
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module KEGG
|
5
|
+
extend Resource
|
6
|
+
self.pkgdir = "phgx"
|
7
|
+
self.subdir = "share/kegg"
|
8
|
+
|
9
|
+
|
10
|
+
KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
|
11
|
+
|
12
|
+
def self.names
|
13
|
+
@@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.descriptions
|
17
|
+
@@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def self.index2genes
|
22
|
+
@@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.index2ens
|
26
|
+
@@index2ens ||= KEGG.identifiers.index(:persist => true)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.index2kegg
|
30
|
+
@@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.id2name(id)
|
34
|
+
names[id]
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.name2id(name)
|
38
|
+
names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def self.description(id)
|
43
|
+
descriptions[id]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if defined? Entity
|
48
|
+
|
49
|
+
module KeggPathway
|
50
|
+
extend Entity
|
51
|
+
self.format = "KEGG Pathway ID"
|
52
|
+
|
53
|
+
self.annotation :organism
|
54
|
+
|
55
|
+
def self.filter(query, field = nil, options = nil, entity = nil)
|
56
|
+
return true if query == entity
|
57
|
+
|
58
|
+
return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
|
59
|
+
|
60
|
+
false
|
61
|
+
end
|
62
|
+
|
63
|
+
property :name => :single2array do
|
64
|
+
return nil if self.nil?
|
65
|
+
name = KEGG.id2name(self)
|
66
|
+
name.sub(/ - Homo.*/,'') unless name.nil?
|
67
|
+
end
|
68
|
+
|
69
|
+
property :description => :single2array do
|
70
|
+
KEGG.description(self)
|
71
|
+
end
|
72
|
+
|
73
|
+
property :genes => :array2single do |*args|
|
74
|
+
organism = args.first || self.organism
|
75
|
+
KEGG.index2genes.values_at(*self).
|
76
|
+
each{|gene| gene.organism = organism if gene.respond_to? :organism }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if defined? Gene and Entity === Gene
|
81
|
+
module Gene
|
82
|
+
self.format = "KEGG Gene ID"
|
83
|
+
|
84
|
+
def to_kegg
|
85
|
+
return self if format == "KEGG Gene ID"
|
86
|
+
if Array === self
|
87
|
+
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
88
|
+
else
|
89
|
+
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def from_kegg
|
94
|
+
return self unless format == "KEGG Gene ID"
|
95
|
+
if Array === self
|
96
|
+
Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
97
|
+
else
|
98
|
+
Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.gene_kegg_pathway_index
|
103
|
+
@@gene_kegg_pathway_index ||=
|
104
|
+
KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
|
105
|
+
end
|
106
|
+
|
107
|
+
property :to => :array2single do |new_format|
|
108
|
+
case
|
109
|
+
when format == new_format
|
110
|
+
self
|
111
|
+
when format == "KEGG Gene ID"
|
112
|
+
ensembl = from_kegg.clean_annotations
|
113
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
114
|
+
when new_format == "KEGG Gene ID"
|
115
|
+
to_kegg
|
116
|
+
else
|
117
|
+
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
property :kegg_pathways => :array2single do
|
122
|
+
@kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
|
123
|
+
each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'phgx'
|
2
|
+
|
3
|
+
module Pina
|
4
|
+
extend Resource
|
5
|
+
self.pkgdir = "phgx"
|
6
|
+
self.subdir = "share/pina"
|
7
|
+
|
8
|
+
Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
|
9
|
+
end
|
10
|
+
|
11
|
+
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
+
require 'rbbt/entity/gene'
|
13
|
+
require 'rbbt/entity/interactor'
|
14
|
+
require 'rbbt/sources/PSI_MI'
|
15
|
+
|
16
|
+
module Gene
|
17
|
+
property :pina_interactors => :array2single do
|
18
|
+
ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
|
19
|
+
pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
|
20
|
+
|
21
|
+
int = self.ensembl.collect do |ens|
|
22
|
+
uniprot = ens2uniprot[ens]
|
23
|
+
list = pina.values_at(*uniprot).compact.collect do |v|
|
24
|
+
Misc.zip_fields(v).collect do |o, method, articles|
|
25
|
+
Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
|
26
|
+
end
|
27
|
+
end.flatten.uniq
|
28
|
+
Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
29
|
+
end
|
30
|
+
|
31
|
+
Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'phgx'
|
2
|
+
|
3
|
+
module STRING
|
4
|
+
extend Resource
|
5
|
+
self.pkgdir = "phgx"
|
6
|
+
self.subdir = "share/string"
|
7
|
+
|
8
|
+
STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
|
9
|
+
end
|
10
|
+
|
11
|
+
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
+
module Gene
|
13
|
+
property :string_interactors => :array2single do |*args|
|
14
|
+
threshold = args.first || 800
|
15
|
+
string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
|
16
|
+
all = self.ensembl.collect do |gene|
|
17
|
+
interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
|
18
|
+
Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
|
19
|
+
end
|
20
|
+
|
21
|
+
all.compact.first.annotate all if Annotated === all.compact.first
|
22
|
+
|
23
|
+
all
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
|
4
|
+
"hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
|
5
|
+
"drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
|
6
|
+
"pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
|
7
|
+
|
8
|
+
|
9
|
+
file :identifiers => 'source/h.sapiens' do |t|
|
10
|
+
pairs = {}
|
11
|
+
entry = nil
|
12
|
+
Open.read(t.prerequisites.first).each do |line|
|
13
|
+
if line =~ /^ENTRY\s+(\d+)/
|
14
|
+
entry = $1
|
15
|
+
next
|
16
|
+
end
|
17
|
+
|
18
|
+
if line =~ /Ensembl: (ENSG\d+)/
|
19
|
+
pairs[entry] = $1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
|
24
|
+
end
|
25
|
+
|
26
|
+
file :gene_drug => 'source/drugs' do |t|
|
27
|
+
pairs = {}
|
28
|
+
drug = nil
|
29
|
+
Open.read(t.prerequisites.first).
|
30
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
31
|
+
if line =~ /^ENTRY\s+(\w+)/
|
32
|
+
drug = $1
|
33
|
+
next
|
34
|
+
end
|
35
|
+
|
36
|
+
if line =~ /TARGET.*?\[HSA:(.*?)\]/
|
37
|
+
genes = $1.split(/\s/)
|
38
|
+
genes.each do |gene|
|
39
|
+
pairs[gene] ||= []
|
40
|
+
pairs[gene] << drug
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
file :drugs => 'source/drugs' do |t|
|
49
|
+
info = {}
|
50
|
+
drug = nil
|
51
|
+
Open.read(t.prerequisites.first).
|
52
|
+
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
53
|
+
if line =~ /^ENTRY\s+(\w+)/
|
54
|
+
drug = $1
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
if line =~ /^NAME(.*)/
|
59
|
+
names = $1.split(/;/)
|
60
|
+
names.each do |name|
|
61
|
+
info[drug] ||= [[],[]]
|
62
|
+
info[drug][0] << name.chomp.strip
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if line =~ /^DBLINKS(.*)/
|
67
|
+
$1.match(/PubChem: (\d*)/)
|
68
|
+
pubchem = $1
|
69
|
+
next unless pubchem
|
70
|
+
info[drug] ||= [[],[]]
|
71
|
+
info[drug][1] << pubchem.chomp.strip
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
|
76
|
+
end
|
77
|
+
|
78
|
+
file :pathways => 'source/pathways' do |t|
|
79
|
+
descs = {}
|
80
|
+
names = {}
|
81
|
+
klass = {}
|
82
|
+
pathway = nil
|
83
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
84
|
+
if line =~ /ENTRY\s+(\w+)/
|
85
|
+
pathway = $1.strip
|
86
|
+
end
|
87
|
+
|
88
|
+
if line =~ /NAME (.*)/
|
89
|
+
names[pathway] = $1.strip
|
90
|
+
end
|
91
|
+
|
92
|
+
if line =~ /DESCRIPTION (.*)/
|
93
|
+
descs[pathway] = $1.strip
|
94
|
+
end
|
95
|
+
|
96
|
+
if line =~ /CLASS (.*)/
|
97
|
+
klass[pathway] = $1.strip
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
|
102
|
+
end
|
103
|
+
|
104
|
+
process_tsv :gene_pathway, 'hsa_gene_map.tab',
|
105
|
+
:sep2 => ' ' do
|
106
|
+
headers ['KEGG Gene ID', 'KEGG Pathway ID']
|
107
|
+
data do |gene, pathway|
|
108
|
+
"hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
|
113
|
+
|
114
|
+
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
|
4
|
+
"drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
|
5
|
+
"diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
|
6
|
+
"relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
|
7
|
+
"variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
|
8
|
+
"pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
|
9
|
+
|
10
|
+
|
11
|
+
process_tsv :diseases, 'diseases',
|
12
|
+
:header_hash => "",
|
13
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
14
|
+
headers ['PhGKB Disease ID']
|
15
|
+
end
|
16
|
+
|
17
|
+
process_tsv :identifiers, 'genes',
|
18
|
+
:header_hash => "",
|
19
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
20
|
+
headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
|
21
|
+
end
|
22
|
+
|
23
|
+
process_tsv :drugs, 'drugs',
|
24
|
+
:header_hash => "",
|
25
|
+
:fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
|
26
|
+
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
27
|
+
headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
|
28
|
+
end
|
29
|
+
|
30
|
+
process_tsv :relationships, 'relationships',
|
31
|
+
:header_hash => "",
|
32
|
+
:merge => true,
|
33
|
+
:fix => proc{|l|
|
34
|
+
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
35
|
+
parts = l.split("\t")
|
36
|
+
rels = parts.pop
|
37
|
+
parts = [parts.values_at(0, 2) * ":"]
|
38
|
+
pmids = []
|
39
|
+
pathways = []
|
40
|
+
rsids = []
|
41
|
+
rels.split(',').each do |r|
|
42
|
+
case
|
43
|
+
when r =~ /PMID:(.*)/
|
44
|
+
pmids << $1
|
45
|
+
when r =~ /Pathway:(.*)/
|
46
|
+
pathways << $1
|
47
|
+
when r =~ /RSID:(.*)/
|
48
|
+
rsids << $1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
parts << pmids * "|"
|
53
|
+
parts << pathways * "|"
|
54
|
+
parts << rsids * "|"
|
55
|
+
|
56
|
+
parts * "\t"
|
57
|
+
},
|
58
|
+
:keep_empty => true do
|
59
|
+
|
60
|
+
headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
process_tsv :gene_drug, 'relationships',
|
65
|
+
:select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
|
66
|
+
:header_hash => "",
|
67
|
+
:merge => true,
|
68
|
+
:fix => proc{|l|
|
69
|
+
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
70
|
+
parts = l.split("\t")
|
71
|
+
rels = parts.pop
|
72
|
+
parts = parts.values_at 0, 2
|
73
|
+
|
74
|
+
parts * "\t"
|
75
|
+
},
|
76
|
+
:keep_empty => true do
|
77
|
+
|
78
|
+
headers ['PhGKB Gene ID', 'PhGKB Drug ID']
|
79
|
+
end
|
80
|
+
|
81
|
+
process_tsv :gene_disease, 'relationships',
|
82
|
+
:select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
|
83
|
+
:key_field => 1,
|
84
|
+
:fields => 3,
|
85
|
+
:merge => true,
|
86
|
+
:header_hash => "",
|
87
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
88
|
+
:keep_empty => true do
|
89
|
+
|
90
|
+
headers ['PhGKB Gene ID', 'PhGKB Disease ID']
|
91
|
+
end
|
92
|
+
|
93
|
+
process_tsv :variants, 'variants',
|
94
|
+
:key_field => 1,
|
95
|
+
:fields => [3,7,8,9,10,4,6,5],
|
96
|
+
:header_hash => "",
|
97
|
+
:merge => true,
|
98
|
+
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
99
|
+
:keep_empty => true do
|
100
|
+
|
101
|
+
headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
|
102
|
+
end
|
103
|
+
|
104
|
+
file :pathways => 'source/pathways' do |t|
|
105
|
+
File.open(t.name, 'w') do |f|
|
106
|
+
f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
|
107
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
108
|
+
case
|
109
|
+
when line =~ /(PA\d+): (.*) - \((.*)\)/
|
110
|
+
f.puts [$1,$2,$3] * "\t"
|
111
|
+
when line =~ /(PA\d+): (.*)/
|
112
|
+
f.puts [$1,$2,""] * "\t"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
file :gene_pathway => 'source/pathways' do |t|
|
119
|
+
pathways = {}
|
120
|
+
last_pathway = nil
|
121
|
+
|
122
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
123
|
+
if line =~ /(P.*):(.*)/
|
124
|
+
last_pathway = $1
|
125
|
+
pathways[last_pathway] = {:name => $2}
|
126
|
+
else
|
127
|
+
type, code, name = line.split(/\t/)
|
128
|
+
next unless type =='Gene'
|
129
|
+
pathways[last_pathway][:genes] ||= []
|
130
|
+
pathways[last_pathway][:genes] << name
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
file :gene_pathway => 'source/pathways' do |t|
|
136
|
+
pathways = {}
|
137
|
+
last_pathway = nil
|
138
|
+
|
139
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
140
|
+
if line =~ /(P.*):(.*)/
|
141
|
+
last_pathway = $1
|
142
|
+
pathways[last_pathway] = {:name => $2}
|
143
|
+
else
|
144
|
+
type, code, name = line.split(/\t/)
|
145
|
+
next unless type =='Gene'
|
146
|
+
pathways[last_pathway][:genes] ||= []
|
147
|
+
pathways[last_pathway][:genes] << name
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
File.open(t.name, 'w') do |f|
|
152
|
+
f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
|
153
|
+
pathways.each do |pathway, info|
|
154
|
+
next if info[:genes].nil?
|
155
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
file :pathway_drugs => 'source/pathways' do |t|
|
161
|
+
pathways = {}
|
162
|
+
last_pathway = nil
|
163
|
+
|
164
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
165
|
+
if line =~ /(P.*):(.*)/
|
166
|
+
last_pathway = $1
|
167
|
+
pathways[last_pathway] = {:name => $2}
|
168
|
+
else
|
169
|
+
type, code, name = line.split(/\t/)
|
170
|
+
next unless type =='Drug'
|
171
|
+
pathways[last_pathway][:drugs] ||= []
|
172
|
+
pathways[last_pathway][:drugs] << code
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
File.open(t.name, 'w') do |f|
|
177
|
+
f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
|
178
|
+
pathways.each do |pathway, info|
|
179
|
+
next if info[:drugs].nil?
|
180
|
+
f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
file :disease_pathway => 'source/pathways' do |t|
|
187
|
+
pathways = {}
|
188
|
+
last_pathway = nil
|
189
|
+
|
190
|
+
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
191
|
+
if line =~ /(P.*):(.*)/
|
192
|
+
last_pathway = $1
|
193
|
+
pathways[last_pathway] = {:name => $2}
|
194
|
+
else
|
195
|
+
type, code, name = line.split(/\t/)
|
196
|
+
next unless type =='Disease'
|
197
|
+
pathways[last_pathway][:diseases] ||= []
|
198
|
+
pathways[last_pathway][:diseases] << name
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
File.open(t.name, 'w') do |f|
|
203
|
+
f.puts "#" + %w(ID Name Diseases) * "\t"
|
204
|
+
pathways.each do |pathway, info|
|
205
|
+
next if info[:diseases].nil?
|
206
|
+
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
|
4
|
+
|
5
|
+
process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
|
6
|
+
:key => 0,
|
7
|
+
:fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
|
8
|
+
:fields => [1,6,8],
|
9
|
+
:header_hash => "#",
|
10
|
+
:merge => true,
|
11
|
+
:keep_empty => true do
|
12
|
+
|
13
|
+
headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
|
14
|
+
end
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
|
4
|
+
"chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
|
5
|
+
|
6
|
+
process_tsv :protein_chemical, 'protein_chemicals',
|
7
|
+
:key => 1,
|
8
|
+
:grep => "9606\.",
|
9
|
+
:fix => lambda{|l| l.sub(/9606\./,'')},
|
10
|
+
:keep_empty => true do
|
11
|
+
|
12
|
+
headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
|
13
|
+
end
|
14
|
+
|
15
|
+
$grep_re = []
|
16
|
+
process_tsv :chemicals, 'chemicals',
|
17
|
+
:grep => $grep_re,
|
18
|
+
:key => 0 do
|
19
|
+
|
20
|
+
Rake::Task['protein_chemical'].invoke
|
21
|
+
|
22
|
+
Log.debug "Getting chemicals"
|
23
|
+
chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
|
24
|
+
Log.debug "Getting chemicals [done]"
|
25
|
+
|
26
|
+
$grep_re.replace chemicals
|
27
|
+
|
28
|
+
headers ['STITCH Chemical ID', 'Name', 'Source']
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
+
|
3
|
+
define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
|
4
|
+
|
5
|
+
process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
|
6
|
+
headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
|
7
|
+
end
|
8
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -105,24 +105,34 @@ files:
|
|
105
105
|
- lib/rbbt/sources/go.rb
|
106
106
|
- lib/rbbt/sources/gscholar.rb
|
107
107
|
- lib/rbbt/sources/jochem.rb
|
108
|
+
- lib/rbbt/sources/kegg.rb
|
109
|
+
- lib/rbbt/sources/matador.rb
|
108
110
|
- lib/rbbt/sources/organism.rb
|
109
111
|
- lib/rbbt/sources/pfam.rb
|
112
|
+
- lib/rbbt/sources/pharmagkb.rb
|
113
|
+
- lib/rbbt/sources/pina.rb
|
110
114
|
- lib/rbbt/sources/polysearch.rb
|
111
115
|
- lib/rbbt/sources/pubmed.rb
|
112
116
|
- lib/rbbt/sources/reactome.rb
|
117
|
+
- lib/rbbt/sources/stitch.rb
|
118
|
+
- lib/rbbt/sources/string.rb
|
113
119
|
- lib/rbbt/sources/tfacts.rb
|
114
120
|
- lib/rbbt/sources/uniprot.rb
|
115
121
|
- lib/rbbt/sources/wgEncodeBroadHmm.rb
|
116
122
|
- share/Ensembl/release_dates
|
117
123
|
- share/install/Genomes1000/Rakefile
|
118
124
|
- share/install/JoChem/Rakefile
|
125
|
+
- share/install/KEGG/Rakefile
|
119
126
|
- share/install/NCI/Rakefile
|
120
127
|
- share/install/Organism/Hsa/Rakefile
|
121
128
|
- share/install/Organism/Mmu/Rakefile
|
122
129
|
- share/install/Organism/Rno/Rakefile
|
123
130
|
- share/install/Organism/Sce/Rakefile
|
124
131
|
- share/install/Organism/organism_helpers.rb
|
132
|
+
- share/install/PharmaGKB/Rakefile
|
133
|
+
- share/install/Pina/Rakefile
|
125
134
|
- share/install/STITCH/Rakefile
|
135
|
+
- share/install/STRING/Rakefile
|
126
136
|
- share/install/lib/helpers.rb
|
127
137
|
- test/rbbt/sources/test_biomart.rb
|
128
138
|
- test/rbbt/sources/test_entrez.rb
|