rbbt-phgx 2.1.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- metadata +18 -28
- data/lib/rbbt/sources/kegg.rb +0 -127
- data/lib/rbbt/sources/matador.rb +0 -9
- data/lib/rbbt/sources/pharmagkb.rb +0 -9
- data/lib/rbbt/sources/pina.rb +0 -35
- data/lib/rbbt/sources/stitch.rb +0 -9
- data/lib/rbbt/sources/string.rb +0 -27
- data/share/install/KEGG/Rakefile +0 -114
- data/share/install/PharmaGKB/Rakefile +0 -211
- data/share/install/Pina/Rakefile +0 -16
- data/share/install/STITCH/Rakefile +0 -30
- data/share/install/STRING/Rakefile +0 -8
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
YmM0ZWI0YTU0Y2Q5MmE0YmM4ZmUwN2JiODJjNzRiNDUyMGVhZWU1YQ==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 36529e9255e02520dc576429de183175c52b2e30
|
4
|
+
data.tar.gz: 95a9925f8c9c79c2ea65e1effe28964d3ece20d8
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
OWU2MTdlMGJhNTFjNzlhOTYyNjA1ZDFkNjM4ZTczZTAyNmFkNzMxZWFjZjBk
|
11
|
-
YmEwM2U1MmViNzU2OGE4ZGI3Njk3NTYyMmZjMzVjZDA5MjJkODk=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
YWU2NGJiMmE4MmVkNjYxZmJkYmVjODc0OGJhZGYwNTliN2QwMmVhZDNlMDJh
|
14
|
-
MjJkMmUxYmViNmY3MGNhYTVlMTZiNmUyMDQxYTRiOGUzYTVhNTI2MWIzMzk2
|
15
|
-
NmE4OTg0ZDRjOGYxYjQ1YjE5NTI4Y2M1ZWI5NmVlNDczYjU2MzE=
|
6
|
+
metadata.gz: 860587fb353eb97f5bb7e6515e030e9590d258533c1e0698896f416b7bab70db5214581c7d1d2cb0d842d91b4e3100d4c47a0faa57c0d42356b39f386d5f1745
|
7
|
+
data.tar.gz: 5c1f9ca7ce18f225aa5a8c013067f3e665a26c97284922e7138b22b29ef7d913b92f1a871c9b38a7c73eef4bbf0a814260d60e8ebae63b3d9d984b016c731780
|
metadata
CHANGED
@@ -1,30 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-phgx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 4.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
|
-
description: Pharmaco-genomics related data sources
|
27
|
+
description: Pharmaco-genomics related data sources. Deprecated. Relevant files moved
|
28
|
+
to rbbt-sources for the time been
|
28
29
|
email: miguel.vazquez@fdi.ucm.es
|
29
30
|
executables: []
|
30
31
|
extensions: []
|
@@ -43,24 +44,13 @@ files:
|
|
43
44
|
- lib/rbbt/sources/biogrid.rb
|
44
45
|
- lib/rbbt/sources/cancer.rb
|
45
46
|
- lib/rbbt/sources/dbsnp.rb
|
46
|
-
- lib/rbbt/sources/kegg.rb
|
47
|
-
- lib/rbbt/sources/matador.rb
|
48
|
-
- lib/rbbt/sources/pharmagkb.rb
|
49
|
-
- lib/rbbt/sources/pina.rb
|
50
|
-
- lib/rbbt/sources/stitch.rb
|
51
|
-
- lib/rbbt/sources/string.rb
|
52
47
|
- share/Cancer/anais_annotations
|
53
48
|
- share/Cancer/anais_interactions
|
54
49
|
- share/Cancer/cancer_genes.tsv
|
55
50
|
- share/install/Biogrid/Rakefile
|
56
51
|
- share/install/DBSNP/Rakefile
|
57
|
-
- share/install/KEGG/Rakefile
|
58
52
|
- share/install/Matador/Rakefile
|
59
53
|
- share/install/NCI/Rakefile
|
60
|
-
- share/install/PharmaGKB/Rakefile
|
61
|
-
- share/install/Pina/Rakefile
|
62
|
-
- share/install/STITCH/Rakefile
|
63
|
-
- share/install/STRING/Rakefile
|
64
54
|
- share/install/lib/rake_helper.rb
|
65
55
|
- test/rbbt/mutation/test_chasm.rb
|
66
56
|
- test/rbbt/mutation/test_fireDB.rb
|
@@ -86,32 +76,32 @@ require_paths:
|
|
86
76
|
- lib
|
87
77
|
required_ruby_version: !ruby/object:Gem::Requirement
|
88
78
|
requirements:
|
89
|
-
- -
|
79
|
+
- - ">="
|
90
80
|
- !ruby/object:Gem::Version
|
91
81
|
version: '0'
|
92
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
83
|
requirements:
|
94
|
-
- -
|
84
|
+
- - ">="
|
95
85
|
- !ruby/object:Gem::Version
|
96
86
|
version: '0'
|
97
87
|
requirements: []
|
98
88
|
rubyforge_project:
|
99
|
-
rubygems_version: 2.2.
|
89
|
+
rubygems_version: 2.2.1
|
100
90
|
signing_key:
|
101
91
|
specification_version: 4
|
102
|
-
summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
92
|
+
summary: DEPRECATED -- Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
103
93
|
test_files:
|
104
|
-
- test/rbbt/sources/test_matador.rb
|
105
|
-
- test/rbbt/sources/test_cancer.rb
|
106
|
-
- test/rbbt/sources/test_stitch.rb
|
107
|
-
- test/rbbt/sources/test_pharmagkb.rb
|
108
|
-
- test/rbbt/sources/test_kegg.rb
|
109
|
-
- test/rbbt/mutation/test_oncodriveFM.rb
|
110
|
-
- test/rbbt/mutation/test_polyphen.rb
|
111
94
|
- test/rbbt/mutation/test_chasm.rb
|
112
|
-
- test/rbbt/mutation/test_sift.rb
|
113
95
|
- test/rbbt/mutation/test_transFIC.rb
|
114
96
|
- test/rbbt/mutation/test_snps_and_go.rb
|
97
|
+
- test/rbbt/mutation/test_sift.rb
|
98
|
+
- test/rbbt/mutation/test_oncodriveFM.rb
|
99
|
+
- test/rbbt/mutation/test_polyphen.rb
|
115
100
|
- test/rbbt/mutation/test_mutation_assessor.rb
|
116
101
|
- test/rbbt/mutation/test_fireDB.rb
|
102
|
+
- test/rbbt/sources/test_pharmagkb.rb
|
103
|
+
- test/rbbt/sources/test_kegg.rb
|
104
|
+
- test/rbbt/sources/test_matador.rb
|
105
|
+
- test/rbbt/sources/test_cancer.rb
|
106
|
+
- test/rbbt/sources/test_stitch.rb
|
117
107
|
- test/test_helper.rb
|
data/lib/rbbt/sources/kegg.rb
DELETED
@@ -1,127 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
require 'rbbt/resource'
|
3
|
-
|
4
|
-
module KEGG
|
5
|
-
extend Resource
|
6
|
-
self.pkgdir = "phgx"
|
7
|
-
self.subdir = "share/kegg"
|
8
|
-
|
9
|
-
|
10
|
-
KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
|
11
|
-
|
12
|
-
def self.names
|
13
|
-
@@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.descriptions
|
17
|
-
@@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
def self.index2genes
|
22
|
-
@@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.index2ens
|
26
|
-
@@index2ens ||= KEGG.identifiers.index(:persist => true)
|
27
|
-
end
|
28
|
-
|
29
|
-
def self.index2kegg
|
30
|
-
@@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
|
31
|
-
end
|
32
|
-
|
33
|
-
def self.id2name(id)
|
34
|
-
names[id]
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.name2id(name)
|
38
|
-
names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
|
39
|
-
end
|
40
|
-
|
41
|
-
|
42
|
-
def self.description(id)
|
43
|
-
descriptions[id]
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
if defined? Entity
|
48
|
-
|
49
|
-
module KeggPathway
|
50
|
-
extend Entity
|
51
|
-
self.format = "KEGG Pathway ID"
|
52
|
-
|
53
|
-
self.annotation :organism
|
54
|
-
|
55
|
-
def self.filter(query, field = nil, options = nil, entity = nil)
|
56
|
-
return true if query == entity
|
57
|
-
|
58
|
-
return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
|
59
|
-
|
60
|
-
false
|
61
|
-
end
|
62
|
-
|
63
|
-
property :name => :single2array do
|
64
|
-
return nil if self.nil?
|
65
|
-
name = KEGG.id2name(self)
|
66
|
-
name.sub(/ - Homo.*/,'') unless name.nil?
|
67
|
-
end
|
68
|
-
|
69
|
-
property :description => :single2array do
|
70
|
-
KEGG.description(self)
|
71
|
-
end
|
72
|
-
|
73
|
-
property :genes => :array2single do |*args|
|
74
|
-
organism = args.first || self.organism
|
75
|
-
KEGG.index2genes.values_at(*self).
|
76
|
-
each{|gene| gene.organism = organism if gene.respond_to? :organism }
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
if defined? Gene and Entity === Gene
|
81
|
-
module Gene
|
82
|
-
self.format = "KEGG Gene ID"
|
83
|
-
|
84
|
-
def to_kegg
|
85
|
-
return self if format == "KEGG Gene ID"
|
86
|
-
if Array === self
|
87
|
-
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
88
|
-
else
|
89
|
-
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def from_kegg
|
94
|
-
return self unless format == "KEGG Gene ID"
|
95
|
-
if Array === self
|
96
|
-
Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
97
|
-
else
|
98
|
-
Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
def self.gene_kegg_pathway_index
|
103
|
-
@@gene_kegg_pathway_index ||=
|
104
|
-
KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
|
105
|
-
end
|
106
|
-
|
107
|
-
property :to => :array2single do |new_format|
|
108
|
-
case
|
109
|
-
when format == new_format
|
110
|
-
self
|
111
|
-
when format == "KEGG Gene ID"
|
112
|
-
ensembl = from_kegg.clean_annotations
|
113
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
114
|
-
when new_format == "KEGG Gene ID"
|
115
|
-
to_kegg
|
116
|
-
else
|
117
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
property :kegg_pathways => :array2single do
|
122
|
-
@kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
|
123
|
-
each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
data/lib/rbbt/sources/matador.rb
DELETED
data/lib/rbbt/sources/pina.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
require 'phgx'
|
2
|
-
|
3
|
-
module Pina
|
4
|
-
extend Resource
|
5
|
-
self.pkgdir = "phgx"
|
6
|
-
self.subdir = "share/pina"
|
7
|
-
|
8
|
-
Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
|
9
|
-
end
|
10
|
-
|
11
|
-
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
-
require 'rbbt/entity/gene'
|
13
|
-
require 'rbbt/entity/interactor'
|
14
|
-
require 'rbbt/sources/PSI_MI'
|
15
|
-
|
16
|
-
module Gene
|
17
|
-
property :pina_interactors => :array2single do
|
18
|
-
ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
|
19
|
-
pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
|
20
|
-
|
21
|
-
int = self.ensembl.collect do |ens|
|
22
|
-
uniprot = ens2uniprot[ens]
|
23
|
-
list = pina.values_at(*uniprot).compact.collect do |v|
|
24
|
-
Misc.zip_fields(v).collect do |o, method, articles|
|
25
|
-
Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
|
26
|
-
end
|
27
|
-
end.flatten.uniq
|
28
|
-
Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
29
|
-
end
|
30
|
-
|
31
|
-
Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
data/lib/rbbt/sources/stitch.rb
DELETED
data/lib/rbbt/sources/string.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
require 'phgx'
|
2
|
-
|
3
|
-
module STRING
|
4
|
-
extend Resource
|
5
|
-
self.pkgdir = "phgx"
|
6
|
-
self.subdir = "share/string"
|
7
|
-
|
8
|
-
STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
|
9
|
-
end
|
10
|
-
|
11
|
-
if defined? Entity and defined? Gene and Entity === Gene
|
12
|
-
module Gene
|
13
|
-
property :string_interactors => :array2single do |*args|
|
14
|
-
threshold = args.first || 800
|
15
|
-
string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
|
16
|
-
all = self.ensembl.collect do |gene|
|
17
|
-
interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
|
18
|
-
Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
|
19
|
-
end
|
20
|
-
|
21
|
-
all.compact.first.annotate all if Annotated === all.compact.first
|
22
|
-
|
23
|
-
all
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
data/share/install/KEGG/Rakefile
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
-
|
3
|
-
define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
|
4
|
-
"hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
|
5
|
-
"drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
|
6
|
-
"pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
|
7
|
-
|
8
|
-
|
9
|
-
file :identifiers => 'source/h.sapiens' do |t|
|
10
|
-
pairs = {}
|
11
|
-
entry = nil
|
12
|
-
Open.read(t.prerequisites.first).each do |line|
|
13
|
-
if line =~ /^ENTRY\s+(\d+)/
|
14
|
-
entry = $1
|
15
|
-
next
|
16
|
-
end
|
17
|
-
|
18
|
-
if line =~ /Ensembl: (ENSG\d+)/
|
19
|
-
pairs[entry] = $1
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
|
24
|
-
end
|
25
|
-
|
26
|
-
file :gene_drug => 'source/drugs' do |t|
|
27
|
-
pairs = {}
|
28
|
-
drug = nil
|
29
|
-
Open.read(t.prerequisites.first).
|
30
|
-
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
31
|
-
if line =~ /^ENTRY\s+(\w+)/
|
32
|
-
drug = $1
|
33
|
-
next
|
34
|
-
end
|
35
|
-
|
36
|
-
if line =~ /TARGET.*?\[HSA:(.*?)\]/
|
37
|
-
genes = $1.split(/\s/)
|
38
|
-
genes.each do |gene|
|
39
|
-
pairs[gene] ||= []
|
40
|
-
pairs[gene] << drug
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
|
46
|
-
end
|
47
|
-
|
48
|
-
file :drugs => 'source/drugs' do |t|
|
49
|
-
info = {}
|
50
|
-
drug = nil
|
51
|
-
Open.read(t.prerequisites.first).
|
52
|
-
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
53
|
-
if line =~ /^ENTRY\s+(\w+)/
|
54
|
-
drug = $1
|
55
|
-
next
|
56
|
-
end
|
57
|
-
|
58
|
-
if line =~ /^NAME(.*)/
|
59
|
-
names = $1.split(/;/)
|
60
|
-
names.each do |name|
|
61
|
-
info[drug] ||= [[],[]]
|
62
|
-
info[drug][0] << name.chomp.strip
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
if line =~ /^DBLINKS(.*)/
|
67
|
-
$1.match(/PubChem: (\d*)/)
|
68
|
-
pubchem = $1
|
69
|
-
next unless pubchem
|
70
|
-
info[drug] ||= [[],[]]
|
71
|
-
info[drug][1] << pubchem.chomp.strip
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
|
76
|
-
end
|
77
|
-
|
78
|
-
file :pathways => 'source/pathways' do |t|
|
79
|
-
descs = {}
|
80
|
-
names = {}
|
81
|
-
klass = {}
|
82
|
-
pathway = nil
|
83
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
84
|
-
if line =~ /ENTRY\s+(\w+)/
|
85
|
-
pathway = $1.strip
|
86
|
-
end
|
87
|
-
|
88
|
-
if line =~ /NAME (.*)/
|
89
|
-
names[pathway] = $1.strip
|
90
|
-
end
|
91
|
-
|
92
|
-
if line =~ /DESCRIPTION (.*)/
|
93
|
-
descs[pathway] = $1.strip
|
94
|
-
end
|
95
|
-
|
96
|
-
if line =~ /CLASS (.*)/
|
97
|
-
klass[pathway] = $1.strip
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
|
102
|
-
end
|
103
|
-
|
104
|
-
process_tsv :gene_pathway, 'hsa_gene_map.tab',
|
105
|
-
:sep2 => ' ' do
|
106
|
-
headers ['KEGG Gene ID', 'KEGG Pathway ID']
|
107
|
-
data do |gene, pathway|
|
108
|
-
"hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
|
113
|
-
|
114
|
-
|
@@ -1,211 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
-
|
3
|
-
define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
|
4
|
-
"drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
|
5
|
-
"diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
|
6
|
-
"relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
|
7
|
-
"variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
|
8
|
-
"pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
|
9
|
-
|
10
|
-
|
11
|
-
process_tsv :diseases, 'diseases',
|
12
|
-
:header_hash => "",
|
13
|
-
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
14
|
-
headers ['PhGKB Disease ID']
|
15
|
-
end
|
16
|
-
|
17
|
-
process_tsv :identifiers, 'genes',
|
18
|
-
:header_hash => "",
|
19
|
-
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
20
|
-
headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
|
21
|
-
end
|
22
|
-
|
23
|
-
process_tsv :drugs, 'drugs',
|
24
|
-
:header_hash => "",
|
25
|
-
:fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
|
26
|
-
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
27
|
-
headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
|
28
|
-
end
|
29
|
-
|
30
|
-
process_tsv :relationships, 'relationships',
|
31
|
-
:header_hash => "",
|
32
|
-
:merge => true,
|
33
|
-
:fix => proc{|l|
|
34
|
-
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
35
|
-
parts = l.split("\t")
|
36
|
-
rels = parts.pop
|
37
|
-
parts = [parts.values_at(0, 2) * ":"]
|
38
|
-
pmids = []
|
39
|
-
pathways = []
|
40
|
-
rsids = []
|
41
|
-
rels.split(',').each do |r|
|
42
|
-
case
|
43
|
-
when r =~ /PMID:(.*)/
|
44
|
-
pmids << $1
|
45
|
-
when r =~ /Pathway:(.*)/
|
46
|
-
pathways << $1
|
47
|
-
when r =~ /RSID:(.*)/
|
48
|
-
rsids << $1
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
parts << pmids * "|"
|
53
|
-
parts << pathways * "|"
|
54
|
-
parts << rsids * "|"
|
55
|
-
|
56
|
-
parts * "\t"
|
57
|
-
},
|
58
|
-
:keep_empty => true do
|
59
|
-
|
60
|
-
headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
|
61
|
-
end
|
62
|
-
|
63
|
-
|
64
|
-
process_tsv :gene_drug, 'relationships',
|
65
|
-
:select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
|
66
|
-
:header_hash => "",
|
67
|
-
:merge => true,
|
68
|
-
:fix => proc{|l|
|
69
|
-
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
70
|
-
parts = l.split("\t")
|
71
|
-
rels = parts.pop
|
72
|
-
parts = parts.values_at 0, 2
|
73
|
-
|
74
|
-
parts * "\t"
|
75
|
-
},
|
76
|
-
:keep_empty => true do
|
77
|
-
|
78
|
-
headers ['PhGKB Gene ID', 'PhGKB Drug ID']
|
79
|
-
end
|
80
|
-
|
81
|
-
process_tsv :gene_disease, 'relationships',
|
82
|
-
:select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
|
83
|
-
:key_field => 1,
|
84
|
-
:fields => 3,
|
85
|
-
:merge => true,
|
86
|
-
:header_hash => "",
|
87
|
-
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
88
|
-
:keep_empty => true do
|
89
|
-
|
90
|
-
headers ['PhGKB Gene ID', 'PhGKB Disease ID']
|
91
|
-
end
|
92
|
-
|
93
|
-
process_tsv :variants, 'variants',
|
94
|
-
:key_field => 1,
|
95
|
-
:fields => [3,7,8,9,10,4,6,5],
|
96
|
-
:header_hash => "",
|
97
|
-
:merge => true,
|
98
|
-
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
99
|
-
:keep_empty => true do
|
100
|
-
|
101
|
-
headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
|
102
|
-
end
|
103
|
-
|
104
|
-
file :pathways => 'source/pathways' do |t|
|
105
|
-
File.open(t.name, 'w') do |f|
|
106
|
-
f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
|
107
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
108
|
-
case
|
109
|
-
when line =~ /(PA\d+): (.*) - \((.*)\)/
|
110
|
-
f.puts [$1,$2,$3] * "\t"
|
111
|
-
when line =~ /(PA\d+): (.*)/
|
112
|
-
f.puts [$1,$2,""] * "\t"
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
file :gene_pathway => 'source/pathways' do |t|
|
119
|
-
pathways = {}
|
120
|
-
last_pathway = nil
|
121
|
-
|
122
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
123
|
-
if line =~ /(P.*):(.*)/
|
124
|
-
last_pathway = $1
|
125
|
-
pathways[last_pathway] = {:name => $2}
|
126
|
-
else
|
127
|
-
type, code, name = line.split(/\t/)
|
128
|
-
next unless type =='Gene'
|
129
|
-
pathways[last_pathway][:genes] ||= []
|
130
|
-
pathways[last_pathway][:genes] << name
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
file :gene_pathway => 'source/pathways' do |t|
|
136
|
-
pathways = {}
|
137
|
-
last_pathway = nil
|
138
|
-
|
139
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
140
|
-
if line =~ /(P.*):(.*)/
|
141
|
-
last_pathway = $1
|
142
|
-
pathways[last_pathway] = {:name => $2}
|
143
|
-
else
|
144
|
-
type, code, name = line.split(/\t/)
|
145
|
-
next unless type =='Gene'
|
146
|
-
pathways[last_pathway][:genes] ||= []
|
147
|
-
pathways[last_pathway][:genes] << name
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
File.open(t.name, 'w') do |f|
|
152
|
-
f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
|
153
|
-
pathways.each do |pathway, info|
|
154
|
-
next if info[:genes].nil?
|
155
|
-
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
|
156
|
-
end
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
file :pathway_drugs => 'source/pathways' do |t|
|
161
|
-
pathways = {}
|
162
|
-
last_pathway = nil
|
163
|
-
|
164
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
165
|
-
if line =~ /(P.*):(.*)/
|
166
|
-
last_pathway = $1
|
167
|
-
pathways[last_pathway] = {:name => $2}
|
168
|
-
else
|
169
|
-
type, code, name = line.split(/\t/)
|
170
|
-
next unless type =='Drug'
|
171
|
-
pathways[last_pathway][:drugs] ||= []
|
172
|
-
pathways[last_pathway][:drugs] << code
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
File.open(t.name, 'w') do |f|
|
177
|
-
f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
|
178
|
-
pathways.each do |pathway, info|
|
179
|
-
next if info[:drugs].nil?
|
180
|
-
f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
|
186
|
-
file :disease_pathway => 'source/pathways' do |t|
|
187
|
-
pathways = {}
|
188
|
-
last_pathway = nil
|
189
|
-
|
190
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
191
|
-
if line =~ /(P.*):(.*)/
|
192
|
-
last_pathway = $1
|
193
|
-
pathways[last_pathway] = {:name => $2}
|
194
|
-
else
|
195
|
-
type, code, name = line.split(/\t/)
|
196
|
-
next unless type =='Disease'
|
197
|
-
pathways[last_pathway][:diseases] ||= []
|
198
|
-
pathways[last_pathway][:diseases] << name
|
199
|
-
end
|
200
|
-
end
|
201
|
-
|
202
|
-
File.open(t.name, 'w') do |f|
|
203
|
-
f.puts "#" + %w(ID Name Diseases) * "\t"
|
204
|
-
pathways.each do |pathway, info|
|
205
|
-
next if info[:diseases].nil?
|
206
|
-
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
|
207
|
-
end
|
208
|
-
end
|
209
|
-
end
|
210
|
-
|
211
|
-
add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
|
data/share/install/Pina/Rakefile
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
-
|
3
|
-
define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
|
4
|
-
|
5
|
-
process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
|
6
|
-
:key => 0,
|
7
|
-
:fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
|
8
|
-
:fields => [1,6,8],
|
9
|
-
:header_hash => "#",
|
10
|
-
:merge => true,
|
11
|
-
:keep_empty => true do
|
12
|
-
|
13
|
-
headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
|
14
|
-
end
|
15
|
-
|
16
|
-
|
@@ -1,30 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
-
|
3
|
-
define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
|
4
|
-
"chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
|
5
|
-
|
6
|
-
process_tsv :protein_chemical, 'protein_chemicals',
|
7
|
-
:key => 1,
|
8
|
-
:grep => "9606\.",
|
9
|
-
:fix => lambda{|l| l.sub(/9606\./,'')},
|
10
|
-
:keep_empty => true do
|
11
|
-
|
12
|
-
headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
|
13
|
-
end
|
14
|
-
|
15
|
-
$grep_re = []
|
16
|
-
process_tsv :chemicals, 'chemicals',
|
17
|
-
:grep => $grep_re,
|
18
|
-
:key => 0 do
|
19
|
-
|
20
|
-
Rake::Task['protein_chemical'].invoke
|
21
|
-
|
22
|
-
Log.debug "Getting chemicals"
|
23
|
-
chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
|
24
|
-
Log.debug "Getting chemicals [done]"
|
25
|
-
|
26
|
-
$grep_re.replace chemicals
|
27
|
-
|
28
|
-
headers ['STITCH Chemical ID', 'Name', 'Source']
|
29
|
-
end
|
30
|
-
|
@@ -1,8 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
2
|
-
|
3
|
-
define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
|
4
|
-
|
5
|
-
process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
|
6
|
-
headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
|
7
|
-
end
|
8
|
-
|