rbbt-sources 2.1.7 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: daf367338fb6e78d2cb7b76440e67712d27f34ab
4
- data.tar.gz: 5b7a7308779ec4441fa2eb997d6f9b7f0dd37e3a
3
+ metadata.gz: 00cd4a9602b9ea2637a620b30cd3d48a6d63a9fe
4
+ data.tar.gz: c282f8c86de5148343e5a83ea524cdc09435b9fb
5
5
  SHA512:
6
- metadata.gz: bb568b0d788284e82d0ac0d9cdbd14db7c0e59b4977ddce57e2701f25ca18bbef93d43424179a188f73daaacc87963d039a17aaf0916872945f2d384e6441552
7
- data.tar.gz: b24f422176f10f518f692a7878c2389df0276df27a074feb5918bae1993f860fae4558d330f95de66a7857da712b5c811e487c0b117f553106215d1065f856af
6
+ metadata.gz: 701a67455ca18d9c705e2c409628cd5463f7449d2ee40ba4d26cce6f203018db21b9c6ee6f233cf8d80e44e28d3ffcfa08e474678b538b8db7cb80c44e5eac5a
7
+ data.tar.gz: 4bbcf6f222c01c5f3314617ed7c2458b3cebb9d8b3293ac631305ea2c610c935792fe0e5d6a7402f041aab4304e5586a7989f1e2a097b5dc620f0cb7a208250c
@@ -0,0 +1,127 @@
1
+ require 'rbbt'
2
+ require 'rbbt/resource'
3
+
4
+ module KEGG
5
+ extend Resource
6
+ self.pkgdir = "phgx"
7
+ self.subdir = "share/kegg"
8
+
9
+
10
+ KEGG.claim KEGG.root, :rake, Rbbt.share.install.KEGG.Rakefile.find(:lib)
11
+
12
+ def self.names
13
+ @@names ||= KEGG.pathways.tsv :fields => ["Pathway Name"], :persist => true, :type => :single, :unnamed => true
14
+ end
15
+
16
+ def self.descriptions
17
+ @@descriptions ||= KEGG.pathways.tsv(:fields => ["Pathway Description"], :persist => true, :type => :single, :unnamed => true)
18
+ end
19
+
20
+
21
+ def self.index2genes
22
+ @@index2genes ||= KEGG.gene_pathway.tsv(:key_field => "KEGG Pathway ID", :fields => ["KEGG Gene ID"], :persist => true, :type => :flat, :merge => true)
23
+ end
24
+
25
+ def self.index2ens
26
+ @@index2ens ||= KEGG.identifiers.index(:persist => true)
27
+ end
28
+
29
+ def self.index2kegg
30
+ @@index2kegg ||= KEGG.identifiers.index(:target => "KEGG Gene ID", :persist => true)
31
+ end
32
+
33
+ def self.id2name(id)
34
+ names[id]
35
+ end
36
+
37
+ def self.name2id(name)
38
+ names.select{|id,n| n.downcase.index(name.downcase) == 0}.collect{|id,n| id} rescue []
39
+ end
40
+
41
+
42
+ def self.description(id)
43
+ descriptions[id]
44
+ end
45
+ end
46
+
47
+ if defined? Entity
48
+
49
+ module KeggPathway
50
+ extend Entity
51
+ self.format = "KEGG Pathway ID"
52
+
53
+ self.annotation :organism
54
+
55
+ def self.filter(query, field = nil, options = nil, entity = nil)
56
+ return true if query == entity
57
+
58
+ return true if KeggPathway.setup(entity.dup, options.merge(:format => field)).name.index query
59
+
60
+ false
61
+ end
62
+
63
+ property :name => :single2array do
64
+ return nil if self.nil?
65
+ name = KEGG.id2name(self)
66
+ name.sub(/ - Homo.*/,'') unless name.nil?
67
+ end
68
+
69
+ property :description => :single2array do
70
+ KEGG.description(self)
71
+ end
72
+
73
+ property :genes => :array2single do |*args|
74
+ organism = args.first || self.organism
75
+ KEGG.index2genes.values_at(*self).
76
+ each{|gene| gene.organism = organism if gene.respond_to? :organism }
77
+ end
78
+ end
79
+
80
+ if defined? Gene and Entity === Gene
81
+ module Gene
82
+ self.format = "KEGG Gene ID"
83
+
84
+ def to_kegg
85
+ return self if format == "KEGG Gene ID"
86
+ if Array === self
87
+ Gene.setup(KEGG.index2kegg.values_at(*to("Ensembl Gene ID")), "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
88
+ else
89
+ Gene.setup(KEGG.index2kegg[to("Ensembl Gene ID")], "KEGG Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
90
+ end
91
+ end
92
+
93
+ def from_kegg
94
+ return self unless format == "KEGG Gene ID"
95
+ if Array === self
96
+ Gene.setup(KEGG.index2ens.values_at(*self), "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
97
+ else
98
+ Gene.setup(KEGG.index2ens[self], "Ensembl Gene ID", organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
99
+ end
100
+ end
101
+
102
+ def self.gene_kegg_pathway_index
103
+ @@gene_kegg_pathway_index ||=
104
+ KEGG.gene_pathway.tsv(:persist => true, :key_field => "KEGG Gene ID", :fields => ["KEGG Pathway ID"], :type => :flat, :merge => true)
105
+ end
106
+
107
+ property :to => :array2single do |new_format|
108
+ case
109
+ when format == new_format
110
+ self
111
+ when format == "KEGG Gene ID"
112
+ ensembl = from_kegg.clean_annotations
113
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => ensembl, :format => new_format).exec.chunked_values_at(ensembl), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
114
+ when new_format == "KEGG Gene ID"
115
+ to_kegg
116
+ else
117
+ Gene.setup(Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.chunked_values_at(self), new_format, organism).tap{|o| o.extend AnnotatedArray if AnnotatedArray === self }
118
+ end
119
+ end
120
+
121
+ property :kegg_pathways => :array2single do
122
+ @kegg_pathways ||= Gene.gene_kegg_pathway_index.values_at(*self.to_kegg).
123
+ each{|pth| pth.organism = organism if pth.respond_to? :organism }.tap{|o| KeggPathway.setup(o, organism)}
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+
3
+ module Matador
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/matador"
7
+
8
+ Matador.claim Matador.root, :rake, Rbbt.share.install.Matador.Rakefile.find(:lib)
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+
3
+ module PharmaGKB
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/pharmagkb"
7
+
8
+ PharmaGKB.claim PharmaGKB.root, :rake, Rbbt.share.install.PharmaGKB.Rakefile.find(:lib)
9
+ end
@@ -0,0 +1,35 @@
1
+ require 'phgx'
2
+
3
+ module Pina
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/pina"
7
+
8
+ Pina.claim Pina.root, :rake, Rbbt.share.install.Pina.Rakefile.find(:lib)
9
+ end
10
+
11
+ if defined? Entity and defined? Gene and Entity === Gene
12
+ require 'rbbt/entity/gene'
13
+ require 'rbbt/entity/interactor'
14
+ require 'rbbt/sources/PSI_MI'
15
+
16
+ module Gene
17
+ property :pina_interactors => :array2single do
18
+ ens2uniprot = Organism.identifiers(organism).tsv :key_field => "Ensembl Gene ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :persist => true, :unnamed => true
19
+ pina = Pina.protein_protein.tsv(:persist => true, :fields => ["Interactor UniProt/SwissProt Accession", "Method", "PMID"], :type => :double, :merge => true, :unnamed => true)
20
+
21
+ int = self.ensembl.collect do |ens|
22
+ uniprot = ens2uniprot[ens]
23
+ list = pina.values_at(*uniprot).compact.collect do |v|
24
+ Misc.zip_fields(v).collect do |o, method, articles|
25
+ Interactor.setup(o, PSI_MITerm.setup(method.split(";;")), PMID.setup(articles.split(";;")))
26
+ end
27
+ end.flatten.uniq
28
+ Gene.setup(list, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
29
+ end
30
+
31
+ Gene.setup(int, "UniProt/SwissProt Accession", organism).extend(AnnotatedArray)
32
+ end
33
+ end
34
+ end
35
+
@@ -0,0 +1,9 @@
1
+ require 'phgx'
2
+
3
+ module STITCH
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/stitch"
7
+
8
+ STITCH.claim STITCH.root, :rake, Rbbt.share.install.STITCH.Rakefile.find(:lib)
9
+ end
@@ -0,0 +1,27 @@
1
+ require 'phgx'
2
+
3
+ module STRING
4
+ extend Resource
5
+ self.pkgdir = "phgx"
6
+ self.subdir = "share/string"
7
+
8
+ STRING.claim STRING.root, :rake, Rbbt.share.install.STRING.Rakefile.find(:lib)
9
+ end
10
+
11
+ if defined? Entity and defined? Gene and Entity === Gene
12
+ module Gene
13
+ property :string_interactors => :array2single do |*args|
14
+ threshold = args.first || 800
15
+ string = STRING.protein_protein.tsv(:unnamed => true, :persist => true, :type => :double)
16
+ all = self.ensembl.collect do |gene|
17
+ interactors = gene.proteins.collect{|protein| Misc.zip_fields((string[protein] || [[],[]])).select{|i, score| score.to_i > threshold}.collect{|ints,s| ints}}.compact.flatten.uniq
18
+ Protein.setup(interactors, "Ensembl Protein ID", organism).transcript.gene.compact.uniq
19
+ end
20
+
21
+ all.compact.first.annotate all if Annotated === all.compact.first
22
+
23
+ all
24
+ end
25
+ end
26
+ end
27
+
@@ -0,0 +1,114 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "h.sapiens" => "ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/H.sapiens.ent",
4
+ "hsa_gene_map.tab" => "ftp://ftp.genome.jp/pub/kegg/pathway/organisms/hsa/hsa_gene_map.tab",
5
+ "drugs" => "ftp://ftp.genome.jp/pub/kegg/medicus/drug/drug",
6
+ "pathways" => "ftp://ftp.genome.jp/pub/kegg/pathway/pathway"
7
+
8
+
9
+ file :identifiers => 'source/h.sapiens' do |t|
10
+ pairs = {}
11
+ entry = nil
12
+ Open.read(t.prerequisites.first).each do |line|
13
+ if line =~ /^ENTRY\s+(\d+)/
14
+ entry = $1
15
+ next
16
+ end
17
+
18
+ if line =~ /Ensembl: (ENSG\d+)/
19
+ pairs[entry] = $1
20
+ end
21
+ end
22
+
23
+ Open.write(t.name, ['#Ensembl Gene ID','KEGG Gene ID'] * "\t" + "\n" + pairs.collect{|entry, ens| [ens, "hsa:" + entry] * "\t"} * "\n")
24
+ end
25
+
26
+ file :gene_drug => 'source/drugs' do |t|
27
+ pairs = {}
28
+ drug = nil
29
+ Open.read(t.prerequisites.first).
30
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|TARGET/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
31
+ if line =~ /^ENTRY\s+(\w+)/
32
+ drug = $1
33
+ next
34
+ end
35
+
36
+ if line =~ /TARGET.*?\[HSA:(.*?)\]/
37
+ genes = $1.split(/\s/)
38
+ genes.each do |gene|
39
+ pairs[gene] ||= []
40
+ pairs[gene] << drug
41
+ end
42
+ end
43
+ end
44
+
45
+ Open.write(t.name, ['#KEGG Gene ID', 'KEGG Drug ID'] * "\t" + "\n" + pairs.collect{|gene, drugs| ["hsa:" + gene, drugs * "|" ] * "\t"} * "\n")
46
+ end
47
+
48
+ file :drugs => 'source/drugs' do |t|
49
+ info = {}
50
+ drug = nil
51
+ Open.read(t.prerequisites.first).
52
+ scan(/^[A-Z].*?(?:^[A-Z])/sm).select{|line| line =~ /^ENTRY|NAME|DBLINKS/}.collect{|line| line.sub(/\s+/,' ')}.each do |line|
53
+ if line =~ /^ENTRY\s+(\w+)/
54
+ drug = $1
55
+ next
56
+ end
57
+
58
+ if line =~ /^NAME(.*)/
59
+ names = $1.split(/;/)
60
+ names.each do |name|
61
+ info[drug] ||= [[],[]]
62
+ info[drug][0] << name.chomp.strip
63
+ end
64
+ end
65
+
66
+ if line =~ /^DBLINKS(.*)/
67
+ $1.match(/PubChem: (\d*)/)
68
+ pubchem = $1
69
+ next unless pubchem
70
+ info[drug] ||= [[],[]]
71
+ info[drug][1] << pubchem.chomp.strip
72
+ end
73
+ end
74
+
75
+ Open.write(t.name, ['#KEGG Drug ID', 'KEGG Drug Name', 'PubChem Drug ID'] * "\t" + "\n" + info.collect{|drug, info| [drug, info.collect{|v| v * "|"} ].flatten * "\t"} * "\n")
76
+ end
77
+
78
+ file :pathways => 'source/pathways' do |t|
79
+ descs = {}
80
+ names = {}
81
+ klass = {}
82
+ pathway = nil
83
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
84
+ if line =~ /ENTRY\s+(\w+)/
85
+ pathway = $1.strip
86
+ end
87
+
88
+ if line =~ /NAME (.*)/
89
+ names[pathway] = $1.strip
90
+ end
91
+
92
+ if line =~ /DESCRIPTION (.*)/
93
+ descs[pathway] = $1.strip
94
+ end
95
+
96
+ if line =~ /CLASS (.*)/
97
+ klass[pathway] = $1.strip
98
+ end
99
+ end
100
+
101
+ Open.write(t.name, "#: :type=:list\n" + ['#KEGG Pathway ID', 'Pathway Name', 'Pathway Description', 'Pathway Class'] * "\t" + "\n" + names.keys.collect{|pathway| [pathway, names[pathway], descs[pathway], klass[pathway]] * "\t"} * "\n")
102
+ end
103
+
104
+ process_tsv :gene_pathway, 'hsa_gene_map.tab',
105
+ :sep2 => ' ' do
106
+ headers ['KEGG Gene ID', 'KEGG Pathway ID']
107
+ data do |gene, pathway|
108
+ "hsa:#{ gene }\t#{pathway.flatten.collect{|name| "hsa" + name} * "|"}"
109
+ end
110
+ end
111
+
112
+ add_to_defaults [:pathways, :drugs, :gene_drug, :genes]
113
+
114
+
@@ -0,0 +1,211 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "genes" => "http://www.pharmgkb.org/commonFileDownload.action?filename=genes.zip",
4
+ "drugs" => "http://www.pharmgkb.org/commonFileDownload.action?filename=drugs.zip",
5
+ "diseases" => "http://www.pharmgkb.org/commonFileDownload.action?filename=diseases.zip",
6
+ "relationships" => "http://www.pharmgkb.org/commonFileDownload.action?filename=relationships.zip",
7
+ "variants" => "http://www.pharmgkb.org/commonFileDownload.action?filename=variantAnnotations.zip",
8
+ "pathways" => "http://www.pharmgkb.org/commonFileDownload.action?filename=pathways-tsv.zip"
9
+
10
+
11
+ process_tsv :diseases, 'diseases',
12
+ :header_hash => "",
13
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
14
+ headers ['PhGKB Disease ID']
15
+ end
16
+
17
+ process_tsv :identifiers, 'genes',
18
+ :header_hash => "",
19
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
20
+ headers ['PhGKB Gene ID', 'Entrez Gene ID', 'Ensembl Gene Id', 'UniProt/SwissProt Accession', 'Long Name', 'Associated Gene Name']
21
+ end
22
+
23
+ process_tsv :drugs, 'drugs',
24
+ :header_hash => "",
25
+ :fields => ['Name', 'DrugBank Id', 'SMILES', "MeSH IDs"],
26
+ :fix => proc{|l| l.gsub(/","/,'|').gsub(/"/,'').gsub(/,(\t|$)/,'\1')} do
27
+ headers ['PhGKB Drug ID', 'Drug Name', 'DrugBank Id', 'SMILES', "MeSH ID"]
28
+ end
29
+
30
+ process_tsv :relationships, 'relationships',
31
+ :header_hash => "",
32
+ :merge => true,
33
+ :fix => proc{|l|
34
+ l.gsub!(/Gene:|Drug:|Disease:/,'')
35
+ parts = l.split("\t")
36
+ rels = parts.pop
37
+ parts = [parts.values_at(0, 2) * ":"]
38
+ pmids = []
39
+ pathways = []
40
+ rsids = []
41
+ rels.split(',').each do |r|
42
+ case
43
+ when r =~ /PMID:(.*)/
44
+ pmids << $1
45
+ when r =~ /Pathway:(.*)/
46
+ pathways << $1
47
+ when r =~ /RSID:(.*)/
48
+ rsids << $1
49
+ end
50
+ end
51
+
52
+ parts << pmids * "|"
53
+ parts << pathways * "|"
54
+ parts << rsids * "|"
55
+
56
+ parts * "\t"
57
+ },
58
+ :keep_empty => true do
59
+
60
+ headers ['PhGKB Relationship', "PMID", "PhGKB Pathway ID", "Variant ID"]
61
+ end
62
+
63
+
64
+ process_tsv :gene_drug, 'relationships',
65
+ :select => proc{|l| l =~ /^Gene:/ && l =~ /Drug:/},
66
+ :header_hash => "",
67
+ :merge => true,
68
+ :fix => proc{|l|
69
+ l.gsub!(/Gene:|Drug:|Disease:/,'')
70
+ parts = l.split("\t")
71
+ rels = parts.pop
72
+ parts = parts.values_at 0, 2
73
+
74
+ parts * "\t"
75
+ },
76
+ :keep_empty => true do
77
+
78
+ headers ['PhGKB Gene ID', 'PhGKB Drug ID']
79
+ end
80
+
81
+ process_tsv :gene_disease, 'relationships',
82
+ :select => proc{|l| l =~ /^Gene:/ && l =~ /Disease:/},
83
+ :key_field => 1,
84
+ :fields => 3,
85
+ :merge => true,
86
+ :header_hash => "",
87
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
88
+ :keep_empty => true do
89
+
90
+ headers ['PhGKB Gene ID', 'PhGKB Disease ID']
91
+ end
92
+
93
+ process_tsv :variants, 'variants',
94
+ :key_field => 1,
95
+ :fields => [3,7,8,9,10,4,6,5],
96
+ :header_hash => "",
97
+ :merge => true,
98
+ :fix => proc{|l| l.gsub(/Gene:|Drug:|Disease/,'')},
99
+ :keep_empty => true do
100
+
101
+ headers ['Variant ID', 'Associated Gene Name', 'Drug', 'Drug_Class', 'Disease', 'Curation', 'Feature', 'Annotation', 'Evidence']
102
+ end
103
+
104
+ file :pathways => 'source/pathways' do |t|
105
+ File.open(t.name, 'w') do |f|
106
+ f.puts "#" + ['PhGKB Pathway ID','Pathway Name','Pathway Annotation Source'] * "\t"
107
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
108
+ case
109
+ when line =~ /(PA\d+): (.*) - \((.*)\)/
110
+ f.puts [$1,$2,$3] * "\t"
111
+ when line =~ /(PA\d+): (.*)/
112
+ f.puts [$1,$2,""] * "\t"
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ file :gene_pathway => 'source/pathways' do |t|
119
+ pathways = {}
120
+ last_pathway = nil
121
+
122
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
123
+ if line =~ /(P.*):(.*)/
124
+ last_pathway = $1
125
+ pathways[last_pathway] = {:name => $2}
126
+ else
127
+ type, code, name = line.split(/\t/)
128
+ next unless type =='Gene'
129
+ pathways[last_pathway][:genes] ||= []
130
+ pathways[last_pathway][:genes] << name
131
+ end
132
+ end
133
+ end
134
+
135
+ file :gene_pathway => 'source/pathways' do |t|
136
+ pathways = {}
137
+ last_pathway = nil
138
+
139
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
140
+ if line =~ /(P.*):(.*)/
141
+ last_pathway = $1
142
+ pathways[last_pathway] = {:name => $2}
143
+ else
144
+ type, code, name = line.split(/\t/)
145
+ next unless type =='Gene'
146
+ pathways[last_pathway][:genes] ||= []
147
+ pathways[last_pathway][:genes] << name
148
+ end
149
+ end
150
+
151
+ File.open(t.name, 'w') do |f|
152
+ f.puts "#" + ['PhGKB Pathway ID', 'Pathway Name', 'Associated Gene Name'] * "\t"
153
+ pathways.each do |pathway, info|
154
+ next if info[:genes].nil?
155
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:genes] * "|"}"
156
+ end
157
+ end
158
+ end
159
+
160
+ file :pathway_drugs => 'source/pathways' do |t|
161
+ pathways = {}
162
+ last_pathway = nil
163
+
164
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
165
+ if line =~ /(P.*):(.*)/
166
+ last_pathway = $1
167
+ pathways[last_pathway] = {:name => $2}
168
+ else
169
+ type, code, name = line.split(/\t/)
170
+ next unless type =='Drug'
171
+ pathways[last_pathway][:drugs] ||= []
172
+ pathways[last_pathway][:drugs] << code
173
+ end
174
+ end
175
+
176
+ File.open(t.name, 'w') do |f|
177
+ f.puts "#" + ["PhGKB Pathway ID", "PhGKB Drug ID"]* "\t"
178
+ pathways.each do |pathway, info|
179
+ next if info[:drugs].nil?
180
+ f.puts "#{ pathway }\t#{info[:drugs] * "|"}"
181
+ end
182
+ end
183
+ end
184
+
185
+
186
+ file :disease_pathway => 'source/pathways' do |t|
187
+ pathways = {}
188
+ last_pathway = nil
189
+
190
+ Open.read(t.prerequisites.first).split(/\n/).each do |line|
191
+ if line =~ /(P.*):(.*)/
192
+ last_pathway = $1
193
+ pathways[last_pathway] = {:name => $2}
194
+ else
195
+ type, code, name = line.split(/\t/)
196
+ next unless type =='Disease'
197
+ pathways[last_pathway][:diseases] ||= []
198
+ pathways[last_pathway][:diseases] << name
199
+ end
200
+ end
201
+
202
+ File.open(t.name, 'w') do |f|
203
+ f.puts "#" + %w(ID Name Diseases) * "\t"
204
+ pathways.each do |pathway, info|
205
+ next if info[:diseases].nil?
206
+ f.puts "#{ pathway }\t#{info[:name]}\t#{info[:diseases] * "|"}"
207
+ end
208
+ end
209
+ end
210
+
211
+ add_to_defaults [:gene_pathway, :drug_pathway, :disease_pathway]
@@ -0,0 +1,16 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "Homo sapiens-20110628.txt" => "http://cbg.garvan.unsw.edu.au/pina/download/Homo%20sapiens-20110628.txt"
4
+
5
+ process_tsv :protein_protein, 'Homo sapiens-20110628.txt',
6
+ :key => 0,
7
+ :fix => lambda{|l| l.gsub("uniprotkb:", '').gsub("(gene name)",'').gsub("pubmed:",'').gsub("|", ';;').gsub(/\([^)]+\)/,'')},
8
+ :fields => [1,6,8],
9
+ :header_hash => "#",
10
+ :merge => true,
11
+ :keep_empty => true do
12
+
13
+ headers ['UniProt/SwissProt Accession', 'Interactor UniProt/SwissProt Accession', 'Method', 'PMID']
14
+ end
15
+
16
+
@@ -0,0 +1,30 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_chemicals" => "http://stitch.embl.de:8080/download/protein_chemical.links.v2.0.tsv.gz",
4
+ "chemicals" => "http://stitch.embl.de:8080/download/chemical.aliases.v2.0.tsv.gz"
5
+
6
+ process_tsv :protein_chemical, 'protein_chemicals',
7
+ :key => 1,
8
+ :grep => "9606\.",
9
+ :fix => lambda{|l| l.sub(/9606\./,'')},
10
+ :keep_empty => true do
11
+
12
+ headers ['Ensembl Protein ID', 'STITCH Chemical ID', 'Score']
13
+ end
14
+
15
+ $grep_re = []
16
+ process_tsv :chemicals, 'chemicals',
17
+ :grep => $grep_re,
18
+ :key => 0 do
19
+
20
+ Rake::Task['protein_chemical'].invoke
21
+
22
+ Log.debug "Getting chemicals"
23
+ chemicals = TSV.open('protein_chemical', :key_field => 1, :fields => []).keys
24
+ Log.debug "Getting chemicals [done]"
25
+
26
+ $grep_re.replace chemicals
27
+
28
+ headers ['STITCH Chemical ID', 'Name', 'Source']
29
+ end
30
+
@@ -0,0 +1,8 @@
1
+ require File.join(File.dirname(__FILE__),'../lib/rake_helper')
2
+
3
+ define_source_tasks "protein_protein" => "http://string-db.org/newstring_download/protein.links.v9.05.txt.gz"
4
+
5
+ process_tsv :protein_protein, 'protein_protein', :grep => '9606\.ENSP', :fix => lambda{|l| l.gsub(/9606\./,'')}, :merge => true, :sep => "\s" do
6
+ headers ['Ensembl Protein ID', 'Interactor Ensembl Protein ID', 'Score']
7
+ end
8
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.7
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-21 00:00:00.000000000 Z
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -105,24 +105,34 @@ files:
105
105
  - lib/rbbt/sources/go.rb
106
106
  - lib/rbbt/sources/gscholar.rb
107
107
  - lib/rbbt/sources/jochem.rb
108
+ - lib/rbbt/sources/kegg.rb
109
+ - lib/rbbt/sources/matador.rb
108
110
  - lib/rbbt/sources/organism.rb
109
111
  - lib/rbbt/sources/pfam.rb
112
+ - lib/rbbt/sources/pharmagkb.rb
113
+ - lib/rbbt/sources/pina.rb
110
114
  - lib/rbbt/sources/polysearch.rb
111
115
  - lib/rbbt/sources/pubmed.rb
112
116
  - lib/rbbt/sources/reactome.rb
117
+ - lib/rbbt/sources/stitch.rb
118
+ - lib/rbbt/sources/string.rb
113
119
  - lib/rbbt/sources/tfacts.rb
114
120
  - lib/rbbt/sources/uniprot.rb
115
121
  - lib/rbbt/sources/wgEncodeBroadHmm.rb
116
122
  - share/Ensembl/release_dates
117
123
  - share/install/Genomes1000/Rakefile
118
124
  - share/install/JoChem/Rakefile
125
+ - share/install/KEGG/Rakefile
119
126
  - share/install/NCI/Rakefile
120
127
  - share/install/Organism/Hsa/Rakefile
121
128
  - share/install/Organism/Mmu/Rakefile
122
129
  - share/install/Organism/Rno/Rakefile
123
130
  - share/install/Organism/Sce/Rakefile
124
131
  - share/install/Organism/organism_helpers.rb
132
+ - share/install/PharmaGKB/Rakefile
133
+ - share/install/Pina/Rakefile
125
134
  - share/install/STITCH/Rakefile
135
+ - share/install/STRING/Rakefile
126
136
  - share/install/lib/helpers.rb
127
137
  - test/rbbt/sources/test_biomart.rb
128
138
  - test/rbbt/sources/test_entrez.rb