rbbt-sources 2.1.7 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: daf367338fb6e78d2cb7b76440e67712d27f34ab
4
- data.tar.gz: 5b7a7308779ec4441fa2eb997d6f9b7f0dd37e3a
3
+ metadata.gz: 00cd4a9602b9ea2637a620b30cd3d48a6d63a9fe
4
+ data.tar.gz: c282f8c86de5148343e5a83ea524cdc09435b9fb
5
5
  SHA512:
6
- metadata.gz: bb568b0d788284e82d0ac0d9cdbd14db7c0e59b4977ddce57e2701f25ca18bbef93d43424179a188f73daaacc87963d039a17aaf0916872945f2d384e6441552
7
- data.tar.gz: b24f422176f10f518f692a7878c2389df0276df27a074feb5918bae1993f860fae4558d330f95de66a7857da712b5c811e487c0b117f553106215d1065f856af
6
+ metadata.gz: 701a67455ca18d9c705e2c409628cd5463f7449d2ee40ba4d26cce6f203018db21b9c6ee6f233cf8d80e44e28d3ffcfa08e474678b538b8db7cb80c44e5eac5a
7
+ data.tar.gz: 4bbcf6f222c01c5f3314617ed7c2458b3cebb9d8b3293ac631305ea2c610c935792fe0e5d6a7402f041aab4304e5586a7989f1e2a097b5dc620f0cb7a208250c
@@ -0,0 +1,127 @@
1
+ require 'rbbt'
2
+ require 'rbbt/resource'
3
+
4
+ module KEGG
5
+ extend Resource
6
+ self.pkgdir = "phgx"
7
+ self.subdir = "share/kegg"
8
+
9
+
10
+ KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
11
+
12
+ def self.names
13
+ @@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
14
+ end
15
+
16
+ def self.descriptions
17
+ @@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
18
+ end
19
+
20
+
21
+ def self.index2genes
22
+ @@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
23
+ end
24
+
25
+ def self.index2ens
26
+ @@index2ens ||= KEGG.identifiers.index(:persist => true)
27
+ end
28
+
29
+ def self.index2kegg
30
+ @@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
31
+ end
32
+
33
+ def self.id2name(id)
34
+ names[id]
35
+ end
36
+
37
+ def self.name2id(name)
38
+ names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
39
+ end
40
+
41
+
42
+ def self.description(id)
43
+ descriptions[id]
44
+ end
45
+ end
46
+
47
+ if defined? Entity
48
+
49
+ module KeggPathway
50
+ extend Entity
51
+ self.format = "KEGG Pathway ID"
52
+
53
+ self.annotation :organism
54
+
55
+ def self.filter(query, field = nil, options = nil, entity = nil)
56
+ return true if query == entity
57
+
58
+ return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
59
+
60
+ false
61
+ end
62
+
63
+ property :name => :single2array do
64
+ return nil if self.nil?
65
+ name = KEGG.id2name(self)
66
+ name.sub(/ - Homo.*/,'') unless name.nil?
67
+ end
68
+
69
+ property :description => :single2array do
70
+ KEGG.description(self)
71
+ end
72
+
73
+ property :genes => :array2single do |*args|
74
+ organism = args.first || self.organism
75
+ KEGG.index2genes.values_at(*self).
76
+ each{|gene| gene.organism = organism if gene.respond_to? :organism }
77
+ end
78
+ end
79
+
80
+ if defined? Gene and Entity === Gene
81
+ module Gene
82
+ self.format = "KEGG Gene ID"
83
+
84
+ def to_kegg
85
+ return self if format == "KEGG Gene ID"
86
+ if Array === self
87
+ Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
88
+ else
89
+ Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
90
+ end
91
+ end
92
+
93
+ def from_kegg
94
+ return self unless format == "KEGG Gene ID"
95
+ if Array === self
96
+ Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
97
+ else
98
+ Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
99
+ end
100
+ end
101
+
102
+ def self.gene_kegg_pathway_index
103
+ @@gene_kegg_pathway_index ||=
104
+ KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
105
+ end
106
+
107
+ property :to => :array2single do |new_format|
108
+ case
109
+ when format == new_format
110
+ self
111
+ when format == "KEGG Gene ID"
112
+ ensembl = from_kegg.clean_annotations
113
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
114
+ when new_format == "KEGG Gene ID"
115
+ to_kegg
116
+ else
117
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
118
+ end
119
+ end
120
+
121
+ property :kegg_pathways => :array2single do
122
+ @kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
123
+ each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+
3
+ module Matador
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/matador"
7
+
8
+ Matador.claim Matador.root, :rake, Rbbt.share.install.Matador.Rakefile.find(:lib)
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+
3
+ module PharmaGKB
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/pharmagkb"
7
+
8
+ PharmaGKB.claim PharmaGKB.root, :rake, Rbbt.share.install.PharmaGKB.Rakefile.find(:lib)
9
+ end
@@ -0,0 +1,35 @@
1
+ require 'phgx'
2
+
3
+ module Pina
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/pina"
7
+
8
+ Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
9
+ end
10
+
11
+ if defined? Entity and defined? Gene and Entity === Gene
12
+ require 'rbbt/entity/gene'
13
+ require 'rbbt/entity/interactor'
14
+ require 'rbbt/sources/PSI_MI'
15
+
16
+ module Gene
17
+ property :pina_interactors => :array2single do
18
+ ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
19
+ pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
20
+
21
+ int = self.ensembl.collect do |ens|
22
+ uniprot = ens2uniprot[ens]
23
+ list = pina.values_at(*uniprot).compact.collect do |v|
24
+ Misc.zip_fields(v).collect do |o, method, articles|
25
+ Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
26
+ end
27
+ end.flatten.uniq
28
+ Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
29
+ end
30
+
31
+ Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
32
+ end
33
+ end
34
+ end
35
+
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+
3
+ module STITCH
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/stitch"
7
+
8
+ STITCH.claim STITCH.root, :rake, Rbbt.share.install.STITCH.Rakefile.find(:lib)
9
+ end
@@ -0,0 +1,27 @@
1
+ require 'phgx'
2
+
3
+ module STRING
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/string"
7
+
8
+ STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
9
+ end
10
+
11
+ if defined? Entity and defined? Gene and Entity === Gene
12
+ module Gene
13
+ property :string_interactors => :array2single do |*args|
14
+ threshold = args.first || 800
15
+ string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
16
+ all = self.ensembl.collect do |gene|
17
+ interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
18
+ Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
19
+ end
20
+
21
+ all.compact.first.annotate all if Annotated === all.compact.first
22
+
23
+ all
24
+ end
25
+ end
26
+ end
27
+
@@ -0,0 +1,114 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
4
+ "hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
5
+ "drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
6
+ "pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
7
+
8
+
9
+ file :identifiers => 'source/h.sapiens' do |t|
10
+ pairs = {}
11
+ entry = nil
12
+ Open.read(t.prerequisites.first).each do |line|
13
+ if line =~ /^ENTRY\s+(\d+)/
14
+ entry = $1
15
+ next
16
+ end
17
+
18
+ if line =~ /Ensembl: (ENSG\d+)/
19
+ pairs[entry] = $1
20
+ end
21
+ end
22
+
23
+ Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
24
+ end
25
+
26
+ file :gene_drug => 'source/drugs' do |t|
27
+ pairs = {}
28
+ drug = nil
29
+ Open.read(t.prerequisites.first).
30
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
31
+ if line =~ /^ENTRY\s+(\w+)/
32
+ drug = $1
33
+ next
34
+ end
35
+
36
+ if line =~ /TARGET.*?\[HSA:(.*?)\]/
37
+ genes = $1.split(/\s/)
38
+ genes.each do |gene|
39
+ pairs[gene] ||= []
40
+ pairs[gene] << drug
41
+ end
42
+ end
43
+ end
44
+
45
+ Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
46
+ end
47
+
48
+ file :drugs => 'source/drugs' do |t|
49
+ info = {}
50
+ drug = nil
51
+ Open.read(t.prerequisites.first).
52
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
53
+ if line =~ /^ENTRY\s+(\w+)/
54
+ drug = $1
55
+ next
56
+ end
57
+
58
+ if line =~ /^NAME(.*)/
59
+ names = $1.split(/;/)
60
+ names.each do |name|
61
+ info[drug] ||= [[],[]]
62
+ info[drug][0] << name.chomp.strip
63
+ end
64
+ end
65
+
66
+ if line =~ /^DBLINKS(.*)/
67
+ $1.match(/PubChem: (\d*)/)
68
+ pubchem = $1
69
+ next unless pubchem
70
+ info[drug] ||= [[],[]]
71
+ info[drug][1] << pubchem.chomp.strip
72
+ end
73
+ end
74
+
75
+ Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
76
+ end
77
+
78
+ file :pathways => 'source/pathways' do |t|
79
+ descs = {}
80
+ names = {}
81
+ klass = {}
82
+ pathway = nil
83
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
84
+ if line =~ /ENTRY\s+(\w+)/
85
+ pathway = $1.strip
86
+ end
87
+
88
+ if line =~ /NAME (.*)/
89
+ names[pathway] = $1.strip
90
+ end
91
+
92
+ if line =~ /DESCRIPTION (.*)/
93
+ descs[pathway] = $1.strip
94
+ end
95
+
96
+ if line =~ /CLASS (.*)/
97
+ klass[pathway] = $1.strip
98
+ end
99
+ end
100
+
101
+ Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
102
+ end
103
+
104
+ process_tsv :gene_pathway, 'hsa_gene_map.tab',
105
+ :sep2 => ' ' do
106
+ headers ['KEGG Gene ID', 'KEGG Pathway ID']
107
+ data do |gene, pathway|
108
+ "hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
109
+ end
110
+ end
111
+
112
+ add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
113
+
114
+
@@ -0,0 +1,211 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
4
+ "drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
5
+ "diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
6
+ "relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
7
+ "variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
8
+ "pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
9
+
10
+
11
+ process_tsv :diseases, 'diseases',
12
+ :header_hash => "",
13
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
14
+ headers ['PhGKB Disease ID']
15
+ end
16
+
17
+ process_tsv :identifiers, 'genes',
18
+ :header_hash => "",
19
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
20
+ headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
21
+ end
22
+
23
+ process_tsv :drugs, 'drugs',
24
+ :header_hash => "",
25
+ :fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
26
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
27
+ headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
28
+ end
29
+
30
+ process_tsv :relationships, 'relationships',
31
+ :header_hash => "",
32
+ :merge => true,
33
+ :fix => proc{|l|
34
+ l.gsub!(/Gene:|Drug:|Disease:/,'')
35
+ parts = l.split("\t")
36
+ rels = parts.pop
37
+ parts = [parts.values_at(0, 2) * ":"]
38
+ pmids = []
39
+ pathways = []
40
+ rsids = []
41
+ rels.split(',').each do |r|
42
+ case
43
+ when r =~ /PMID:(.*)/
44
+ pmids << $1
45
+ when r =~ /Pathway:(.*)/
46
+ pathways << $1
47
+ when r =~ /RSID:(.*)/
48
+ rsids << $1
49
+ end
50
+ end
51
+
52
+ parts << pmids * "|"
53
+ parts << pathways * "|"
54
+ parts << rsids * "|"
55
+
56
+ parts * "\t"
57
+ },
58
+ :keep_empty => true do
59
+
60
+ headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
61
+ end
62
+
63
+
64
+ process_tsv :gene_drug, 'relationships',
65
+ :select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
66
+ :header_hash => "",
67
+ :merge => true,
68
+ :fix => proc{|l|
69
+ l.gsub!(/Gene:|Drug:|Disease:/,'')
70
+ parts = l.split("\t")
71
+ rels = parts.pop
72
+ parts = parts.values_at 0, 2
73
+
74
+ parts * "\t"
75
+ },
76
+ :keep_empty => true do
77
+
78
+ headers ['PhGKB Gene ID', 'PhGKB Drug ID']
79
+ end
80
+
81
+ process_tsv :gene_disease, 'relationships',
82
+ :select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
83
+ :key_field => 1,
84
+ :fields => 3,
85
+ :merge => true,
86
+ :header_hash => "",
87
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
88
+ :keep_empty => true do
89
+
90
+ headers ['PhGKB Gene ID', 'PhGKB Disease ID']
91
+ end
92
+
93
+ process_tsv :variants, 'variants',
94
+ :key_field => 1,
95
+ :fields => [3,7,8,9,10,4,6,5],
96
+ :header_hash => "",
97
+ :merge => true,
98
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
99
+ :keep_empty => true do
100
+
101
+ headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
102
+ end
103
+
104
+ file :pathways => 'source/pathways' do |t|
105
+ File.open(t.name, 'w') do |f|
106
+ f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
107
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
108
+ case
109
+ when line =~ /(PA\d+): (.*) - \((.*)\)/
110
+ f.puts [$1,$2,$3] * "\t"
111
+ when line =~ /(PA\d+): (.*)/
112
+ f.puts [$1,$2,""] * "\t"
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ file :gene_pathway => 'source/pathways' do |t|
119
+ pathways = {}
120
+ last_pathway = nil
121
+
122
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
123
+ if line =~ /(P.*):(.*)/
124
+ last_pathway = $1
125
+ pathways[last_pathway] = {:name => $2}
126
+ else
127
+ type, code, name = line.split(/\t/)
128
+ next unless type =='Gene'
129
+ pathways[last_pathway][:genes] ||= []
130
+ pathways[last_pathway][:genes] << name
131
+ end
132
+ end
133
+ end
134
+
135
+ file :gene_pathway => 'source/pathways' do |t|
136
+ pathways = {}
137
+ last_pathway = nil
138
+
139
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
140
+ if line =~ /(P.*):(.*)/
141
+ last_pathway = $1
142
+ pathways[last_pathway] = {:name => $2}
143
+ else
144
+ type, code, name = line.split(/\t/)
145
+ next unless type =='Gene'
146
+ pathways[last_pathway][:genes] ||= []
147
+ pathways[last_pathway][:genes] << name
148
+ end
149
+ end
150
+
151
+ File.open(t.name, 'w') do |f|
152
+ f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
153
+ pathways.each do |pathway, info|
154
+ next if info[:genes].nil?
155
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
156
+ end
157
+ end
158
+ end
159
+
160
+ file :pathway_drugs => 'source/pathways' do |t|
161
+ pathways = {}
162
+ last_pathway = nil
163
+
164
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
165
+ if line =~ /(P.*):(.*)/
166
+ last_pathway = $1
167
+ pathways[last_pathway] = {:name => $2}
168
+ else
169
+ type, code, name = line.split(/\t/)
170
+ next unless type =='Drug'
171
+ pathways[last_pathway][:drugs] ||= []
172
+ pathways[last_pathway][:drugs] << code
173
+ end
174
+ end
175
+
176
+ File.open(t.name, 'w') do |f|
177
+ f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
178
+ pathways.each do |pathway, info|
179
+ next if info[:drugs].nil?
180
+ f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
181
+ end
182
+ end
183
+ end
184
+
185
+
186
+ file :disease_pathway => 'source/pathways' do |t|
187
+ pathways = {}
188
+ last_pathway = nil
189
+
190
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
191
+ if line =~ /(P.*):(.*)/
192
+ last_pathway = $1
193
+ pathways[last_pathway] = {:name => $2}
194
+ else
195
+ type, code, name = line.split(/\t/)
196
+ next unless type =='Disease'
197
+ pathways[last_pathway][:diseases] ||= []
198
+ pathways[last_pathway][:diseases] << name
199
+ end
200
+ end
201
+
202
+ File.open(t.name, 'w') do |f|
203
+ f.puts "#" + %w(ID Name Diseases) * "\t"
204
+ pathways.each do |pathway, info|
205
+ next if info[:diseases].nil?
206
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
207
+ end
208
+ end
209
+ end
210
+
211
+ add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
@@ -0,0 +1,16 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
4
+
5
+ process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
6
+ :key => 0,
7
+ :fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
8
+ :fields => [1,6,8],
9
+ :header_hash => "#",
10
+ :merge => true,
11
+ :keep_empty => true do
12
+
13
+ headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
14
+ end
15
+
16
+
@@ -0,0 +1,30 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
4
+ "chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
5
+
6
+ process_tsv :protein_chemical, 'protein_chemicals',
7
+ :key => 1,
8
+ :grep => "9606\.",
9
+ :fix => lambda{|l| l.sub(/9606\./,'')},
10
+ :keep_empty => true do
11
+
12
+ headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
13
+ end
14
+
15
+ $grep_re = []
16
+ process_tsv :chemicals, 'chemicals',
17
+ :grep => $grep_re,
18
+ :key => 0 do
19
+
20
+ Rake::Task['protein_chemical'].invoke
21
+
22
+ Log.debug "Getting chemicals"
23
+ chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
24
+ Log.debug "Getting chemicals [done]"
25
+
26
+ $grep_re.replace chemicals
27
+
28
+ headers ['STITCH Chemical ID', 'Name', 'Source']
29
+ end
30
+
@@ -0,0 +1,8 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
4
+
5
+ process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
6
+ headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
7
+ end
8
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.7
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-21 00:00:00.000000000 Z
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -105,24 +105,34 @@ files:
105
105
  - lib/rbbt/sources/go.rb
106
106
  - lib/rbbt/sources/gscholar.rb
107
107
  - lib/rbbt/sources/jochem.rb
108
+ - lib/rbbt/sources/kegg.rb
109
+ - lib/rbbt/sources/matador.rb
108
110
  - lib/rbbt/sources/organism.rb
109
111
  - lib/rbbt/sources/pfam.rb
112
+ - lib/rbbt/sources/pharmagkb.rb
113
+ - lib/rbbt/sources/pina.rb
110
114
  - lib/rbbt/sources/polysearch.rb
111
115
  - lib/rbbt/sources/pubmed.rb
112
116
  - lib/rbbt/sources/reactome.rb
117
+ - lib/rbbt/sources/stitch.rb
118
+ - lib/rbbt/sources/string.rb
113
119
  - lib/rbbt/sources/tfacts.rb
114
120
  - lib/rbbt/sources/uniprot.rb
115
121
  - lib/rbbt/sources/wgEncodeBroadHmm.rb
116
122
  - share/Ensembl/release_dates
117
123
  - share/install/Genomes1000/Rakefile
118
124
  - share/install/JoChem/Rakefile
125
+ - share/install/KEGG/Rakefile
119
126
  - share/install/NCI/Rakefile
120
127
  - share/install/Organism/Hsa/Rakefile
121
128
  - share/install/Organism/Mmu/Rakefile
122
129
  - share/install/Organism/Rno/Rakefile
123
130
  - share/install/Organism/Sce/Rakefile
124
131
  - share/install/Organism/organism_helpers.rb
132
+ - share/install/PharmaGKB/Rakefile
133
+ - share/install/Pina/Rakefile
125
134
  - share/install/STITCH/Rakefile
135
+ - share/install/STRING/Rakefile
126
136
  - share/install/lib/helpers.rb
127
137
  - test/rbbt/sources/test_biomart.rb
128
138
  - test/rbbt/sources/test_entrez.rb