rbbt-phgx 2.1.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- metadata +18 -28
- data/lib/rbbt/sources/kegg.rb +0 -127
- data/lib/rbbt/sources/matador.rb +0 -9
- data/lib/rbbt/sources/pharmagkb.rb +0 -9
- data/lib/rbbt/sources/pina.rb +0 -35
- data/lib/rbbt/sources/stitch.rb +0 -9
- data/lib/rbbt/sources/string.rb +0 -27
- data/share/install/KEGG/Rakefile +0 -114
- data/share/install/PharmaGKB/Rakefile +0 -211
- data/share/install/Pina/Rakefile +0 -16
- data/share/install/STITCH/Rakefile +0 -30
- data/share/install/STRING/Rakefile +0 -8
checksums.yaml
CHANGED
|
@@ -1,15 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
|
|
5
|
-
data.tar.gz: !binary |-
|
|
6
|
-
YmM0ZWI0YTU0Y2Q5MmE0YmM4ZmUwN2JiODJjNzRiNDUyMGVhZWU1YQ==
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 36529e9255e02520dc576429de183175c52b2e30
|
|
4
|
+
data.tar.gz: 95a9925f8c9c79c2ea65e1effe28964d3ece20d8
|
|
7
5
|
SHA512:
|
|
8
|
-
metadata.gz:
|
|
9
|
-
|
|
10
|
-
OWU2MTdlMGJhNTFjNzlhOTYyNjA1ZDFkNjM4ZTczZTAyNmFkNzMxZWFjZjBk
|
|
11
|
-
YmEwM2U1MmViNzU2OGE4ZGI3Njk3NTYyMmZjMzVjZDA5MjJkODk=
|
|
12
|
-
data.tar.gz: !binary |-
|
|
13
|
-
YWU2NGJiMmE4MmVkNjYxZmJkYmVjODc0OGJhZGYwNTliN2QwMmVhZDNlMDJh
|
|
14
|
-
MjJkMmUxYmViNmY3MGNhYTVlMTZiNmUyMDQxYTRiOGUzYTVhNTI2MWIzMzk2
|
|
15
|
-
NmE4OTg0ZDRjOGYxYjQ1YjE5NTI4Y2M1ZWI5NmVlNDczYjU2MzE=
|
|
6
|
+
metadata.gz: 860587fb353eb97f5bb7e6515e030e9590d258533c1e0698896f416b7bab70db5214581c7d1d2cb0d842d91b4e3100d4c47a0faa57c0d42356b39f386d5f1745
|
|
7
|
+
data.tar.gz: 5c1f9ca7ce18f225aa5a8c013067f3e665a26c97284922e7138b22b29ef7d913b92f1a871c9b38a7c73eef4bbf0a814260d60e8ebae63b3d9d984b016c731780
|
metadata
CHANGED
|
@@ -1,30 +1,31 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbbt-phgx
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 3.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Miguel Vazquez
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2014-02-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rbbt-util
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- -
|
|
17
|
+
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
19
|
version: 4.0.0
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- -
|
|
24
|
+
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: 4.0.0
|
|
27
|
-
description: Pharmaco-genomics related data sources
|
|
27
|
+
description: Pharmaco-genomics related data sources. Deprecated. Relevant files moved
|
|
28
|
+
to rbbt-sources for the time been
|
|
28
29
|
email: miguel.vazquez@fdi.ucm.es
|
|
29
30
|
executables: []
|
|
30
31
|
extensions: []
|
|
@@ -43,24 +44,13 @@ files:
|
|
|
43
44
|
- lib/rbbt/sources/biogrid.rb
|
|
44
45
|
- lib/rbbt/sources/cancer.rb
|
|
45
46
|
- lib/rbbt/sources/dbsnp.rb
|
|
46
|
-
- lib/rbbt/sources/kegg.rb
|
|
47
|
-
- lib/rbbt/sources/matador.rb
|
|
48
|
-
- lib/rbbt/sources/pharmagkb.rb
|
|
49
|
-
- lib/rbbt/sources/pina.rb
|
|
50
|
-
- lib/rbbt/sources/stitch.rb
|
|
51
|
-
- lib/rbbt/sources/string.rb
|
|
52
47
|
- share/Cancer/anais_annotations
|
|
53
48
|
- share/Cancer/anais_interactions
|
|
54
49
|
- share/Cancer/cancer_genes.tsv
|
|
55
50
|
- share/install/Biogrid/Rakefile
|
|
56
51
|
- share/install/DBSNP/Rakefile
|
|
57
|
-
- share/install/KEGG/Rakefile
|
|
58
52
|
- share/install/Matador/Rakefile
|
|
59
53
|
- share/install/NCI/Rakefile
|
|
60
|
-
- share/install/PharmaGKB/Rakefile
|
|
61
|
-
- share/install/Pina/Rakefile
|
|
62
|
-
- share/install/STITCH/Rakefile
|
|
63
|
-
- share/install/STRING/Rakefile
|
|
64
54
|
- share/install/lib/rake_helper.rb
|
|
65
55
|
- test/rbbt/mutation/test_chasm.rb
|
|
66
56
|
- test/rbbt/mutation/test_fireDB.rb
|
|
@@ -86,32 +76,32 @@ require_paths:
|
|
|
86
76
|
- lib
|
|
87
77
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
88
78
|
requirements:
|
|
89
|
-
- -
|
|
79
|
+
- - ">="
|
|
90
80
|
- !ruby/object:Gem::Version
|
|
91
81
|
version: '0'
|
|
92
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
83
|
requirements:
|
|
94
|
-
- -
|
|
84
|
+
- - ">="
|
|
95
85
|
- !ruby/object:Gem::Version
|
|
96
86
|
version: '0'
|
|
97
87
|
requirements: []
|
|
98
88
|
rubyforge_project:
|
|
99
|
-
rubygems_version: 2.2.
|
|
89
|
+
rubygems_version: 2.2.1
|
|
100
90
|
signing_key:
|
|
101
91
|
specification_version: 4
|
|
102
|
-
summary: Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
|
92
|
+
summary: DEPRECATED -- Pharmaco-genomics for the Ruby Bioinformatics Toolkit (rbbt)
|
|
103
93
|
test_files:
|
|
104
|
-
- test/rbbt/sources/test_matador.rb
|
|
105
|
-
- test/rbbt/sources/test_cancer.rb
|
|
106
|
-
- test/rbbt/sources/test_stitch.rb
|
|
107
|
-
- test/rbbt/sources/test_pharmagkb.rb
|
|
108
|
-
- test/rbbt/sources/test_kegg.rb
|
|
109
|
-
- test/rbbt/mutation/test_oncodriveFM.rb
|
|
110
|
-
- test/rbbt/mutation/test_polyphen.rb
|
|
111
94
|
- test/rbbt/mutation/test_chasm.rb
|
|
112
|
-
- test/rbbt/mutation/test_sift.rb
|
|
113
95
|
- test/rbbt/mutation/test_transFIC.rb
|
|
114
96
|
- test/rbbt/mutation/test_snps_and_go.rb
|
|
97
|
+
- test/rbbt/mutation/test_sift.rb
|
|
98
|
+
- test/rbbt/mutation/test_oncodriveFM.rb
|
|
99
|
+
- test/rbbt/mutation/test_polyphen.rb
|
|
115
100
|
- test/rbbt/mutation/test_mutation_assessor.rb
|
|
116
101
|
- test/rbbt/mutation/test_fireDB.rb
|
|
102
|
+
- test/rbbt/sources/test_pharmagkb.rb
|
|
103
|
+
- test/rbbt/sources/test_kegg.rb
|
|
104
|
+
- test/rbbt/sources/test_matador.rb
|
|
105
|
+
- test/rbbt/sources/test_cancer.rb
|
|
106
|
+
- test/rbbt/sources/test_stitch.rb
|
|
117
107
|
- test/test_helper.rb
|
data/lib/rbbt/sources/kegg.rb
DELETED
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
require 'rbbt'
|
|
2
|
-
require 'rbbt/resource'
|
|
3
|
-
|
|
4
|
-
module KEGG
|
|
5
|
-
extend Resource
|
|
6
|
-
self.pkgdir = "phgx"
|
|
7
|
-
self.subdir = "share/kegg"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
|
|
11
|
-
|
|
12
|
-
def self.names
|
|
13
|
-
@@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def self.descriptions
|
|
17
|
-
@@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def self.index2genes
|
|
22
|
-
@@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def self.index2ens
|
|
26
|
-
@@index2ens ||= KEGG.identifiers.index(:persist => true)
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def self.index2kegg
|
|
30
|
-
@@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def self.id2name(id)
|
|
34
|
-
names[id]
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def self.name2id(name)
|
|
38
|
-
names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def self.description(id)
|
|
43
|
-
descriptions[id]
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
if defined? Entity
|
|
48
|
-
|
|
49
|
-
module KeggPathway
|
|
50
|
-
extend Entity
|
|
51
|
-
self.format = "KEGG Pathway ID"
|
|
52
|
-
|
|
53
|
-
self.annotation :organism
|
|
54
|
-
|
|
55
|
-
def self.filter(query, field = nil, options = nil, entity = nil)
|
|
56
|
-
return true if query == entity
|
|
57
|
-
|
|
58
|
-
return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
|
|
59
|
-
|
|
60
|
-
false
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
property :name => :single2array do
|
|
64
|
-
return nil if self.nil?
|
|
65
|
-
name = KEGG.id2name(self)
|
|
66
|
-
name.sub(/ - Homo.*/,'') unless name.nil?
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
property :description => :single2array do
|
|
70
|
-
KEGG.description(self)
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
property :genes => :array2single do |*args|
|
|
74
|
-
organism = args.first || self.organism
|
|
75
|
-
KEGG.index2genes.values_at(*self).
|
|
76
|
-
each{|gene| gene.organism = organism if gene.respond_to? :organism }
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
if defined? Gene and Entity === Gene
|
|
81
|
-
module Gene
|
|
82
|
-
self.format = "KEGG Gene ID"
|
|
83
|
-
|
|
84
|
-
def to_kegg
|
|
85
|
-
return self if format == "KEGG Gene ID"
|
|
86
|
-
if Array === self
|
|
87
|
-
Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
|
88
|
-
else
|
|
89
|
-
Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def from_kegg
|
|
94
|
-
return self unless format == "KEGG Gene ID"
|
|
95
|
-
if Array === self
|
|
96
|
-
Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
|
97
|
-
else
|
|
98
|
-
Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def self.gene_kegg_pathway_index
|
|
103
|
-
@@gene_kegg_pathway_index ||=
|
|
104
|
-
KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
property :to => :array2single do |new_format|
|
|
108
|
-
case
|
|
109
|
-
when format == new_format
|
|
110
|
-
self
|
|
111
|
-
when format == "KEGG Gene ID"
|
|
112
|
-
ensembl = from_kegg.clean_annotations
|
|
113
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
|
114
|
-
when new_format == "KEGG Gene ID"
|
|
115
|
-
to_kegg
|
|
116
|
-
else
|
|
117
|
-
Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
property :kegg_pathways => :array2single do
|
|
122
|
-
@kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
|
|
123
|
-
each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
|
|
124
|
-
end
|
|
125
|
-
end
|
|
126
|
-
end
|
|
127
|
-
end
|
data/lib/rbbt/sources/matador.rb
DELETED
data/lib/rbbt/sources/pina.rb
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
require 'phgx'
|
|
2
|
-
|
|
3
|
-
module Pina
|
|
4
|
-
extend Resource
|
|
5
|
-
self.pkgdir = "phgx"
|
|
6
|
-
self.subdir = "share/pina"
|
|
7
|
-
|
|
8
|
-
Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
if defined? Entity and defined? Gene and Entity === Gene
|
|
12
|
-
require 'rbbt/entity/gene'
|
|
13
|
-
require 'rbbt/entity/interactor'
|
|
14
|
-
require 'rbbt/sources/PSI_MI'
|
|
15
|
-
|
|
16
|
-
module Gene
|
|
17
|
-
property :pina_interactors => :array2single do
|
|
18
|
-
ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
|
|
19
|
-
pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
|
|
20
|
-
|
|
21
|
-
int = self.ensembl.collect do |ens|
|
|
22
|
-
uniprot = ens2uniprot[ens]
|
|
23
|
-
list = pina.values_at(*uniprot).compact.collect do |v|
|
|
24
|
-
Misc.zip_fields(v).collect do |o, method, articles|
|
|
25
|
-
Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
|
|
26
|
-
end
|
|
27
|
-
end.flatten.uniq
|
|
28
|
-
Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
data/lib/rbbt/sources/stitch.rb
DELETED
data/lib/rbbt/sources/string.rb
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
require 'phgx'
|
|
2
|
-
|
|
3
|
-
module STRING
|
|
4
|
-
extend Resource
|
|
5
|
-
self.pkgdir = "phgx"
|
|
6
|
-
self.subdir = "share/string"
|
|
7
|
-
|
|
8
|
-
STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
if defined? Entity and defined? Gene and Entity === Gene
|
|
12
|
-
module Gene
|
|
13
|
-
property :string_interactors => :array2single do |*args|
|
|
14
|
-
threshold = args.first || 800
|
|
15
|
-
string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
|
|
16
|
-
all = self.ensembl.collect do |gene|
|
|
17
|
-
interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
|
|
18
|
-
Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
all.compact.first.annotate all if Annotated === all.compact.first
|
|
22
|
-
|
|
23
|
-
all
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
data/share/install/KEGG/Rakefile
DELETED
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
|
2
|
-
|
|
3
|
-
define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
|
|
4
|
-
"hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
|
|
5
|
-
"drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
|
|
6
|
-
"pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
file :identifiers => 'source/h.sapiens' do |t|
|
|
10
|
-
pairs = {}
|
|
11
|
-
entry = nil
|
|
12
|
-
Open.read(t.prerequisites.first).each do |line|
|
|
13
|
-
if line =~ /^ENTRY\s+(\d+)/
|
|
14
|
-
entry = $1
|
|
15
|
-
next
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
if line =~ /Ensembl: (ENSG\d+)/
|
|
19
|
-
pairs[entry] = $1
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
file :gene_drug => 'source/drugs' do |t|
|
|
27
|
-
pairs = {}
|
|
28
|
-
drug = nil
|
|
29
|
-
Open.read(t.prerequisites.first).
|
|
30
|
-
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
|
31
|
-
if line =~ /^ENTRY\s+(\w+)/
|
|
32
|
-
drug = $1
|
|
33
|
-
next
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
if line =~ /TARGET.*?\[HSA:(.*?)\]/
|
|
37
|
-
genes = $1.split(/\s/)
|
|
38
|
-
genes.each do |gene|
|
|
39
|
-
pairs[gene] ||= []
|
|
40
|
-
pairs[gene] << drug
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
file :drugs => 'source/drugs' do |t|
|
|
49
|
-
info = {}
|
|
50
|
-
drug = nil
|
|
51
|
-
Open.read(t.prerequisites.first).
|
|
52
|
-
scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
|
|
53
|
-
if line =~ /^ENTRY\s+(\w+)/
|
|
54
|
-
drug = $1
|
|
55
|
-
next
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
if line =~ /^NAME(.*)/
|
|
59
|
-
names = $1.split(/;/)
|
|
60
|
-
names.each do |name|
|
|
61
|
-
info[drug] ||= [[],[]]
|
|
62
|
-
info[drug][0] << name.chomp.strip
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
if line =~ /^DBLINKS(.*)/
|
|
67
|
-
$1.match(/PubChem: (\d*)/)
|
|
68
|
-
pubchem = $1
|
|
69
|
-
next unless pubchem
|
|
70
|
-
info[drug] ||= [[],[]]
|
|
71
|
-
info[drug][1] << pubchem.chomp.strip
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
file :pathways => 'source/pathways' do |t|
|
|
79
|
-
descs = {}
|
|
80
|
-
names = {}
|
|
81
|
-
klass = {}
|
|
82
|
-
pathway = nil
|
|
83
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
|
84
|
-
if line =~ /ENTRY\s+(\w+)/
|
|
85
|
-
pathway = $1.strip
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
if line =~ /NAME (.*)/
|
|
89
|
-
names[pathway] = $1.strip
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
if line =~ /DESCRIPTION (.*)/
|
|
93
|
-
descs[pathway] = $1.strip
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
if line =~ /CLASS (.*)/
|
|
97
|
-
klass[pathway] = $1.strip
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
process_tsv :gene_pathway, 'hsa_gene_map.tab',
|
|
105
|
-
:sep2 => ' ' do
|
|
106
|
-
headers ['KEGG Gene ID', 'KEGG Pathway ID']
|
|
107
|
-
data do |gene, pathway|
|
|
108
|
-
"hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
|
|
113
|
-
|
|
114
|
-
|
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
|
2
|
-
|
|
3
|
-
define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
|
|
4
|
-
"drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
|
|
5
|
-
"diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
|
|
6
|
-
"relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
|
|
7
|
-
"variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
|
|
8
|
-
"pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
process_tsv :diseases, 'diseases',
|
|
12
|
-
:header_hash => "",
|
|
13
|
-
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
|
14
|
-
headers ['PhGKB Disease ID']
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
process_tsv :identifiers, 'genes',
|
|
18
|
-
:header_hash => "",
|
|
19
|
-
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
|
20
|
-
headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
process_tsv :drugs, 'drugs',
|
|
24
|
-
:header_hash => "",
|
|
25
|
-
:fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
|
|
26
|
-
:fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
|
|
27
|
-
headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
process_tsv :relationships, 'relationships',
|
|
31
|
-
:header_hash => "",
|
|
32
|
-
:merge => true,
|
|
33
|
-
:fix => proc{|l|
|
|
34
|
-
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
|
35
|
-
parts = l.split("\t")
|
|
36
|
-
rels = parts.pop
|
|
37
|
-
parts = [parts.values_at(0, 2) * ":"]
|
|
38
|
-
pmids = []
|
|
39
|
-
pathways = []
|
|
40
|
-
rsids = []
|
|
41
|
-
rels.split(',').each do |r|
|
|
42
|
-
case
|
|
43
|
-
when r =~ /PMID:(.*)/
|
|
44
|
-
pmids << $1
|
|
45
|
-
when r =~ /Pathway:(.*)/
|
|
46
|
-
pathways << $1
|
|
47
|
-
when r =~ /RSID:(.*)/
|
|
48
|
-
rsids << $1
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
parts << pmids * "|"
|
|
53
|
-
parts << pathways * "|"
|
|
54
|
-
parts << rsids * "|"
|
|
55
|
-
|
|
56
|
-
parts * "\t"
|
|
57
|
-
},
|
|
58
|
-
:keep_empty => true do
|
|
59
|
-
|
|
60
|
-
headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
process_tsv :gene_drug, 'relationships',
|
|
65
|
-
:select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
|
|
66
|
-
:header_hash => "",
|
|
67
|
-
:merge => true,
|
|
68
|
-
:fix => proc{|l|
|
|
69
|
-
l.gsub!(/Gene:|Drug:|Disease:/,'')
|
|
70
|
-
parts = l.split("\t")
|
|
71
|
-
rels = parts.pop
|
|
72
|
-
parts = parts.values_at 0, 2
|
|
73
|
-
|
|
74
|
-
parts * "\t"
|
|
75
|
-
},
|
|
76
|
-
:keep_empty => true do
|
|
77
|
-
|
|
78
|
-
headers ['PhGKB Gene ID', 'PhGKB Drug ID']
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
process_tsv :gene_disease, 'relationships',
|
|
82
|
-
:select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
|
|
83
|
-
:key_field => 1,
|
|
84
|
-
:fields => 3,
|
|
85
|
-
:merge => true,
|
|
86
|
-
:header_hash => "",
|
|
87
|
-
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
|
88
|
-
:keep_empty => true do
|
|
89
|
-
|
|
90
|
-
headers ['PhGKB Gene ID', 'PhGKB Disease ID']
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
process_tsv :variants, 'variants',
|
|
94
|
-
:key_field => 1,
|
|
95
|
-
:fields => [3,7,8,9,10,4,6,5],
|
|
96
|
-
:header_hash => "",
|
|
97
|
-
:merge => true,
|
|
98
|
-
:fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
|
|
99
|
-
:keep_empty => true do
|
|
100
|
-
|
|
101
|
-
headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
file :pathways => 'source/pathways' do |t|
|
|
105
|
-
File.open(t.name, 'w') do |f|
|
|
106
|
-
f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
|
|
107
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
|
108
|
-
case
|
|
109
|
-
when line =~ /(PA\d+): (.*) - \((.*)\)/
|
|
110
|
-
f.puts [$1,$2,$3] * "\t"
|
|
111
|
-
when line =~ /(PA\d+): (.*)/
|
|
112
|
-
f.puts [$1,$2,""] * "\t"
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
file :gene_pathway => 'source/pathways' do |t|
|
|
119
|
-
pathways = {}
|
|
120
|
-
last_pathway = nil
|
|
121
|
-
|
|
122
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
|
123
|
-
if line =~ /(P.*):(.*)/
|
|
124
|
-
last_pathway = $1
|
|
125
|
-
pathways[last_pathway] = {:name => $2}
|
|
126
|
-
else
|
|
127
|
-
type, code, name = line.split(/\t/)
|
|
128
|
-
next unless type =='Gene'
|
|
129
|
-
pathways[last_pathway][:genes] ||= []
|
|
130
|
-
pathways[last_pathway][:genes] << name
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
file :gene_pathway => 'source/pathways' do |t|
|
|
136
|
-
pathways = {}
|
|
137
|
-
last_pathway = nil
|
|
138
|
-
|
|
139
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
|
140
|
-
if line =~ /(P.*):(.*)/
|
|
141
|
-
last_pathway = $1
|
|
142
|
-
pathways[last_pathway] = {:name => $2}
|
|
143
|
-
else
|
|
144
|
-
type, code, name = line.split(/\t/)
|
|
145
|
-
next unless type =='Gene'
|
|
146
|
-
pathways[last_pathway][:genes] ||= []
|
|
147
|
-
pathways[last_pathway][:genes] << name
|
|
148
|
-
end
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
File.open(t.name, 'w') do |f|
|
|
152
|
-
f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
|
|
153
|
-
pathways.each do |pathway, info|
|
|
154
|
-
next if info[:genes].nil?
|
|
155
|
-
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
|
|
156
|
-
end
|
|
157
|
-
end
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
file :pathway_drugs => 'source/pathways' do |t|
|
|
161
|
-
pathways = {}
|
|
162
|
-
last_pathway = nil
|
|
163
|
-
|
|
164
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
|
165
|
-
if line =~ /(P.*):(.*)/
|
|
166
|
-
last_pathway = $1
|
|
167
|
-
pathways[last_pathway] = {:name => $2}
|
|
168
|
-
else
|
|
169
|
-
type, code, name = line.split(/\t/)
|
|
170
|
-
next unless type =='Drug'
|
|
171
|
-
pathways[last_pathway][:drugs] ||= []
|
|
172
|
-
pathways[last_pathway][:drugs] << code
|
|
173
|
-
end
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
File.open(t.name, 'w') do |f|
|
|
177
|
-
f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
|
|
178
|
-
pathways.each do |pathway, info|
|
|
179
|
-
next if info[:drugs].nil?
|
|
180
|
-
f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
|
|
181
|
-
end
|
|
182
|
-
end
|
|
183
|
-
end
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
file :disease_pathway => 'source/pathways' do |t|
|
|
187
|
-
pathways = {}
|
|
188
|
-
last_pathway = nil
|
|
189
|
-
|
|
190
|
-
Open.read(t.prerequisites.first).split(/\n/).each do |line|
|
|
191
|
-
if line =~ /(P.*):(.*)/
|
|
192
|
-
last_pathway = $1
|
|
193
|
-
pathways[last_pathway] = {:name => $2}
|
|
194
|
-
else
|
|
195
|
-
type, code, name = line.split(/\t/)
|
|
196
|
-
next unless type =='Disease'
|
|
197
|
-
pathways[last_pathway][:diseases] ||= []
|
|
198
|
-
pathways[last_pathway][:diseases] << name
|
|
199
|
-
end
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
File.open(t.name, 'w') do |f|
|
|
203
|
-
f.puts "#" + %w(ID Name Diseases) * "\t"
|
|
204
|
-
pathways.each do |pathway, info|
|
|
205
|
-
next if info[:diseases].nil?
|
|
206
|
-
f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
|
|
207
|
-
end
|
|
208
|
-
end
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
|
data/share/install/Pina/Rakefile
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
|
2
|
-
|
|
3
|
-
define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
|
|
4
|
-
|
|
5
|
-
process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
|
|
6
|
-
:key => 0,
|
|
7
|
-
:fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
|
|
8
|
-
:fields => [1,6,8],
|
|
9
|
-
:header_hash => "#",
|
|
10
|
-
:merge => true,
|
|
11
|
-
:keep_empty => true do
|
|
12
|
-
|
|
13
|
-
headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
|
2
|
-
|
|
3
|
-
define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
|
|
4
|
-
"chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
|
|
5
|
-
|
|
6
|
-
process_tsv :protein_chemical, 'protein_chemicals',
|
|
7
|
-
:key => 1,
|
|
8
|
-
:grep => "9606\.",
|
|
9
|
-
:fix => lambda{|l| l.sub(/9606\./,'')},
|
|
10
|
-
:keep_empty => true do
|
|
11
|
-
|
|
12
|
-
headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
$grep_re = []
|
|
16
|
-
process_tsv :chemicals, 'chemicals',
|
|
17
|
-
:grep => $grep_re,
|
|
18
|
-
:key => 0 do
|
|
19
|
-
|
|
20
|
-
Rake::Task['protein_chemical'].invoke
|
|
21
|
-
|
|
22
|
-
Log.debug "Getting chemicals"
|
|
23
|
-
chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
|
|
24
|
-
Log.debug "Getting chemicals [done]"
|
|
25
|
-
|
|
26
|
-
$grep_re.replace chemicals
|
|
27
|
-
|
|
28
|
-
headers ['STITCH Chemical ID', 'Name', 'Source']
|
|
29
|
-
end
|
|
30
|
-
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../lib/rake_helper')
|
|
2
|
-
|
|
3
|
-
define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
|
|
4
|
-
|
|
5
|
-
process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
|
|
6
|
-
headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
|
|
7
|
-
end
|
|
8
|
-
|