rbbt-entities 1.1.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/icgc2rbbt.rb +23 -0
- data/bin/vcf2rbbt.rb +15 -0
- data/lib/rbbt/entity/chromosome_range.rb +73 -0
- data/lib/rbbt/entity/cnv.rb +20 -2
- data/lib/rbbt/entity/gene.rb +147 -74
- data/lib/rbbt/entity/genomic_mutation.rb +380 -50
- data/lib/rbbt/entity/genotype.rb +10 -4
- data/lib/rbbt/entity/interactor.rb +6 -0
- data/lib/rbbt/entity/mutated_isoform.rb +171 -83
- data/lib/rbbt/entity/pmid.rb +33 -6
- data/lib/rbbt/entity/protein.rb +36 -7
- data/lib/rbbt/entity/transcript.rb +20 -4
- data/lib/rbbt/entity.rb +123 -68
- data/test/rbbt/entity/test_gene.rb +16 -2
- data/test/rbbt/entity/test_genomic_mutation.rb +53 -1
- data/test/rbbt/entity/test_pmid.rb +19 -0
- data/test/rbbt/test_entity.rb +100 -5
- metadata +51 -72
data/bin/icgc2rbbt.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
|
4
|
+
file = ARGV.shift
|
5
|
+
directory = ARGV.shift
|
6
|
+
|
7
|
+
genotypes = {}
|
8
|
+
Open.read(file).split("\n").each do |line|
|
9
|
+
next if line =~ /^Cancer Type/
|
10
|
+
|
11
|
+
chr, pos, ref, mut, sample = line.split(/\t/).values_at 2, 3, 6, 10, 35
|
12
|
+
|
13
|
+
chr.sub!(/chr/,'')
|
14
|
+
mut = '-' * (mut.length - 1) if mut =~/^-[ACGT]/
|
15
|
+
|
16
|
+
genotypes[sample] ||= []
|
17
|
+
genotypes[sample] << [chr, pos, mut] * ":"
|
18
|
+
end
|
19
|
+
|
20
|
+
genotypes.each do |sample, mutations|
|
21
|
+
mutations.uniq!
|
22
|
+
Open.write(File.join(directory, sample), mutations.uniq * "\n")
|
23
|
+
end
|
data/bin/vcf2rbbt.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
|
4
|
+
file = ARGV.shift
|
5
|
+
|
6
|
+
Open.read(file).split("\n").each do |line|
|
7
|
+
next if line =~ /^#/
|
8
|
+
|
9
|
+
chr, pos, id, ref, mut, score = line.split(/\t/)
|
10
|
+
|
11
|
+
chr.sub!(/chr/,'')
|
12
|
+
mut = mut + '-' * (ref.length - mut.length) if ref.length > mut.length
|
13
|
+
|
14
|
+
puts [chr, pos, mut, score] * ":"
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/entity/gene'
|
5
|
+
|
6
|
+
Workflow.require_workflow "Sequence"
|
7
|
+
|
8
|
+
module ChromosomeRange
|
9
|
+
extend Entity
|
10
|
+
|
11
|
+
self.annotation :organism
|
12
|
+
|
13
|
+
self.format = "Chromosome Range"
|
14
|
+
|
15
|
+
|
16
|
+
def self.text_to_unit(text)
|
17
|
+
text = text.sub('^', '+')
|
18
|
+
base = text.to_f
|
19
|
+
case
|
20
|
+
when text =~ /KB?$/
|
21
|
+
base * 1000
|
22
|
+
when text =~ /MB?$/
|
23
|
+
base * 1000_000
|
24
|
+
when text =~ /^\d+(\.\d+)?(e\+\d+)?$/
|
25
|
+
base
|
26
|
+
else
|
27
|
+
raise "Text format not understood: #{ text }"
|
28
|
+
end.to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
property :unit => :array2single do
|
32
|
+
self.collect{|range|
|
33
|
+
chr, start, eend = range.split(":")
|
34
|
+
[chr, ChromosomeRange.text_to_unit(start), ChromosomeRange.text_to_unit(eend)] * ":"
|
35
|
+
}
|
36
|
+
end
|
37
|
+
persist :unit
|
38
|
+
|
39
|
+
property :genes => :array2single do
|
40
|
+
Sequence.job(:genes_at_genomic_ranges, "ChromosomeRange", :organism => organism, :ranges => self.unit).run.tap{|t| t.namespace = organism}.values_at *self.unit
|
41
|
+
end
|
42
|
+
|
43
|
+
property :ensembl_browser => :single2array do
|
44
|
+
"http://#{Misc.ensembl_server(self.organism)}/Homo_sapiens/Location/View?db=core&r=#{chromosome}:#{start}-#{eend}"
|
45
|
+
end
|
46
|
+
|
47
|
+
property :chromosome => :array2single do
|
48
|
+
self.clean_annotations.collect{|r| r.split(":")[0]}
|
49
|
+
end
|
50
|
+
persist :_ary_chromosome
|
51
|
+
|
52
|
+
property :start => :array2single do
|
53
|
+
self.clean_annotations.collect{|r| r.split(":")[1].to_i}
|
54
|
+
end
|
55
|
+
persist :_ary_start
|
56
|
+
|
57
|
+
property :eend => :array2single do
|
58
|
+
self.clean_annotations.collect{|r| r.split(":")[2].to_i}
|
59
|
+
end
|
60
|
+
persist :_ary_eend
|
61
|
+
|
62
|
+
property :end => :array2single do
|
63
|
+
self.eend
|
64
|
+
end
|
65
|
+
persist :_ary_end
|
66
|
+
|
67
|
+
property :range => :array2single do
|
68
|
+
start.zip(self.end).collect{|s,e| (s..e)}
|
69
|
+
end
|
70
|
+
persist :_ary_range
|
71
|
+
|
72
|
+
|
73
|
+
end
|
data/lib/rbbt/entity/cnv.rb
CHANGED
@@ -26,12 +26,30 @@ module CNV
|
|
26
26
|
|
27
27
|
property :genes => :array2single do
|
28
28
|
@genes ||= begin
|
29
|
-
genes = Sequence.job(:genes_at_genomic_ranges, jobname, :organism => organism, :ranges => self).run
|
30
|
-
genes.unnamed = true
|
29
|
+
genes = Sequence.job(:genes_at_genomic_ranges, jobname, :organism => organism, :ranges => self, :unnamed => true).run
|
31
30
|
genes = genes.values_at *self
|
32
31
|
Gene.setup(genes, "Ensembl Gene ID", organism)
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
35
|
+
property :chromosome => :array2single do
|
36
|
+
self.clean_annotations.collect{|mut| mut.split(":")[0]}
|
37
|
+
end
|
38
|
+
persist :_ary_chromosome
|
39
|
+
|
40
|
+
property :start => :array2single do
|
41
|
+
self.clean_annotations.collect{|mut| mut.split(":")[1].to_i}
|
42
|
+
end
|
43
|
+
persist :_ary_start
|
44
|
+
|
45
|
+
property :end => :array2single do
|
46
|
+
self.clean_annotations.collect{|mut| mut.split(":")[2].to_i}
|
47
|
+
end
|
48
|
+
persist :_ary_end
|
49
|
+
|
50
|
+
property :range => :array2single do
|
51
|
+
start.zip(self.end).collect{|s,e| (s..e)}
|
52
|
+
end
|
53
|
+
|
36
54
|
end
|
37
55
|
|
data/lib/rbbt/entity/gene.rb
CHANGED
@@ -7,6 +7,7 @@ require 'rbbt/sources/cancer'
|
|
7
7
|
require 'rbbt/entity/protein'
|
8
8
|
require 'rbbt/entity/pmid'
|
9
9
|
require 'rbbt/entity/transcript'
|
10
|
+
require 'rbbt/bow/bow'
|
10
11
|
|
11
12
|
Workflow.require_workflow "Translation"
|
12
13
|
|
@@ -15,7 +16,7 @@ module Gene
|
|
15
16
|
|
16
17
|
def self.ensg2enst(organism, gene)
|
17
18
|
@@ensg2enst ||= {}
|
18
|
-
@@ensg2enst[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :flat, :key_field => "Ensembl Gene ID", :fields => ["Ensembl Transcript ID"], :persist => true
|
19
|
+
@@ensg2enst[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :flat, :key_field => "Ensembl Gene ID", :fields => ["Ensembl Transcript ID"], :persist => true, :unnamed => true)
|
19
20
|
|
20
21
|
if Array === gene
|
21
22
|
@@ensg2enst[organism].values_at *gene
|
@@ -24,7 +25,6 @@ module Gene
|
|
24
25
|
end
|
25
26
|
end
|
26
27
|
|
27
|
-
|
28
28
|
def self.filter(query, field = nil, options = nil, entity = nil)
|
29
29
|
return true if query == entity
|
30
30
|
|
@@ -33,39 +33,86 @@ module Gene
|
|
33
33
|
false
|
34
34
|
end
|
35
35
|
|
36
|
+
def self.gene_list_bases(genes)
|
37
|
+
genes = genes.ensembl
|
38
|
+
chromosome_genes = {}
|
39
|
+
Misc.process_to_hash(genes){|genes| genes.chromosome}.each{|gene, chr| chromosome_genes[chr] ||= []; chromosome_genes[chr] << gene}
|
40
|
+
total = 0
|
41
|
+
chromosome_genes.each do |chr,gs|
|
42
|
+
next if chr.nil?
|
43
|
+
total += Misc.total_length(genes.annotate(gs).chr_range.compact)
|
44
|
+
end
|
45
|
+
|
46
|
+
total
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.gene_list_exon_bases(genes)
|
50
|
+
genes = genes.ensembl
|
51
|
+
chromosome_genes = {}
|
52
|
+
Misc.process_to_hash(genes){|genes| genes.chromosome}.each{|gene, chr| chromosome_genes[chr] ||= []; chromosome_genes[chr] << gene}
|
53
|
+
|
54
|
+
@@exon_range_tsv ||= {}
|
55
|
+
organism = genes.organism
|
56
|
+
@@exon_range_tsv[organism] ||= Organism.exons(organism).tsv :persist => true, :fields => ["Exon Chr Start", "Exon Chr End"], :type => :list, :cast => :to_i, :unnamed => true
|
57
|
+
total = 0
|
58
|
+
|
59
|
+
chromosome_genes.each do |chr,gs|
|
60
|
+
next if chr.nil?
|
61
|
+
exons = genes.annotate(gs).transcripts.compact.flatten.exons.compact.flatten.uniq
|
62
|
+
|
63
|
+
exon_ranges = exons.collect{|exon|
|
64
|
+
Log.low "Exon #{ exon } does not have range" unless @@exon_range_tsv[organism].include? exon
|
65
|
+
next unless @@exon_range_tsv[organism].include? exon
|
66
|
+
pos = @@exon_range_tsv[organism][exon]
|
67
|
+
(pos.first..pos.last)
|
68
|
+
}.compact
|
69
|
+
total += Misc.total_length(exon_ranges)
|
70
|
+
end
|
71
|
+
|
72
|
+
total
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
36
77
|
self.annotation :format
|
37
78
|
self.annotation :organism
|
38
79
|
|
39
|
-
self.format = Organism
|
80
|
+
self.format = Organism.identifiers("Hsa").all_fields - ["Ensembl Protein ID", "Ensembl Transcript ID"]
|
40
81
|
|
41
82
|
property :ortholog => :array2single do |other|
|
42
83
|
return self if organism =~ /^#{ other }(?!\w)/
|
43
84
|
new_organism = organism.split(":")
|
44
85
|
new_organism[0] = other
|
45
86
|
new_organism = new_organism * "/"
|
46
|
-
Gene.setup(Organism[organism]["ortholog_#{other}"].tsv(:persist => true).values_at(*self.ensembl).collect{|l| l.first}, "Ensembl Gene ID", new_organism)
|
87
|
+
Gene.setup(Organism[organism]["ortholog_#{other}"].tsv(:persist => true, :unnamed => true).values_at(*self.ensembl).collect{|l| l.first}, "Ensembl Gene ID", new_organism)
|
47
88
|
end
|
48
89
|
persist :ortholog
|
49
90
|
|
50
91
|
property :to => :array2single do |new_format|
|
51
92
|
return self if format == new_format
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
property :__to => :array2single do |new_format|
|
56
|
-
return self if format == new_format
|
57
|
-
to!(new_format).collect!{|v| Array === v ? v.first : v}
|
93
|
+
genes = Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self)
|
94
|
+
Gene.setup(genes, new_format, organism)
|
95
|
+
genes
|
58
96
|
end
|
59
97
|
|
60
98
|
property :strand => :array2single do
|
61
|
-
|
99
|
+
@@strand_tsv ||= {}
|
100
|
+
@@strand_tsv[organism] ||= Organism.gene_positions(organism).tsv(:fields => ["Strand"], :type => :single, :persist => true, :unnamed => true)
|
101
|
+
to("Ensembl Gene ID").collect do |gene|
|
102
|
+
@@strand_tsv[organism][gene]
|
103
|
+
end
|
62
104
|
end
|
63
|
-
persist :
|
105
|
+
persist :_ary_strand
|
64
106
|
|
65
107
|
property :ensembl => :array2single do
|
66
108
|
to "Ensembl Gene ID"
|
67
109
|
end
|
68
110
|
|
111
|
+
property :biotype => :array2single do
|
112
|
+
Organism.gene_biotype(organism).tsv(:persist => true, :type => :single, :unnamed => true).values_at *self.ensembl
|
113
|
+
end
|
114
|
+
persist :biotype
|
115
|
+
|
69
116
|
property :entrez => :array2single do
|
70
117
|
to "Entrez Gene ID"
|
71
118
|
end
|
@@ -73,26 +120,25 @@ module Gene
|
|
73
120
|
property :uniprot => :array2single do
|
74
121
|
to "UniProt/SwissProt Accession"
|
75
122
|
end
|
76
|
-
persist :uniprot
|
77
123
|
|
78
124
|
property :name => :array2single do
|
125
|
+
return self if self.format == "Associated Gene Name"
|
79
126
|
to "Associated Gene Name"
|
80
127
|
end
|
81
|
-
persist :name
|
82
128
|
|
83
129
|
property :chr_start => :array2single do
|
84
|
-
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
130
|
+
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"], :unnamed => true).values_at *self
|
85
131
|
end
|
86
132
|
persist :chr_start
|
87
133
|
|
88
134
|
property :go_bp_terms => :array2single do
|
89
|
-
Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat).values_at *self.ensembl
|
135
|
+
Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :unnamed => true).values_at *self.ensembl
|
90
136
|
end
|
91
137
|
persist :go_bp_terms
|
92
138
|
|
93
139
|
property :long_name => :array2single do
|
94
140
|
entre = self.entrez
|
95
|
-
gene = Entrez.get_gene(entrez).values_at(*entrez).collect{|gene| gene.nil? ? nil : gene.description.flatten.first}
|
141
|
+
gene = Entrez.get_gene(entrez).values_at(*entrez).collect{|gene| gene.nil? ? nil : (gene.description || []).flatten.first}
|
96
142
|
end
|
97
143
|
persist :long_name
|
98
144
|
|
@@ -140,26 +186,26 @@ module Gene
|
|
140
186
|
persist :max_protein_length
|
141
187
|
|
142
188
|
property :chromosome => :array2single do
|
143
|
-
|
144
|
-
|
189
|
+
@@chromosome_tsv ||= {}
|
190
|
+
@@chromosome_tsv[organism] ||= Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true, :unnamed => true
|
145
191
|
if Array === self
|
146
192
|
to("Ensembl Gene ID").collect do |gene|
|
147
|
-
|
193
|
+
@@chromosome_tsv[organism][gene]
|
148
194
|
end
|
149
195
|
else
|
150
|
-
|
196
|
+
@@chromosome_tsv[organism][to("Ensembl Gene ID")]
|
151
197
|
end
|
152
198
|
end
|
153
199
|
persist :chromosome
|
154
200
|
|
155
|
-
property :
|
156
|
-
|
201
|
+
property :chr_range => :array2single do
|
202
|
+
chr_range_index ||= Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i, :unnamed => true
|
157
203
|
to("Ensembl Gene ID").collect do |gene|
|
158
|
-
next if not
|
159
|
-
Range.new *
|
204
|
+
next if not chr_range_index.include? gene
|
205
|
+
Range.new *chr_range_index[gene]
|
160
206
|
end
|
161
207
|
end
|
162
|
-
persist :
|
208
|
+
persist :chr_range
|
163
209
|
|
164
210
|
property :articles => :array2single do
|
165
211
|
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat, :unnamed => true).values_at *self.entrez)
|
@@ -167,14 +213,14 @@ module Gene
|
|
167
213
|
persist :articles
|
168
214
|
|
169
215
|
property :sequence => :array2single do
|
170
|
-
|
171
|
-
|
172
|
-
|
216
|
+
@@sequence_tsv ||= {}
|
217
|
+
@@sequence_tsv[organism] ||= Organism.gene_sequence(organism).tsv :persist => true, :unnamed => true
|
218
|
+
@@sequence_tsv[organism].values_at *self.ensembl
|
173
219
|
end
|
174
220
|
persist :sequence
|
175
221
|
|
176
222
|
property :matador_drugs => :array2single do
|
177
|
-
@@matador ||= Matador.protein_drug.tsv(:persist => false
|
223
|
+
@@matador ||= Matador.protein_drug.tsv(:persist => false, :unnamed => true)
|
178
224
|
|
179
225
|
ensg = self.to("Ensembl Gene ID")
|
180
226
|
|
@@ -225,7 +271,7 @@ module Gene
|
|
225
271
|
persist :pathway_drugs
|
226
272
|
|
227
273
|
property :related_cancers => :array2single do
|
228
|
-
Cancer["cancer_genes.tsv"].tsv(:persist => true, :type => :list).values_at(*self.name).collect{|v| v.nil? ? nil : v["Tumour Types (Somatic Mutations)"].split(", ") + v["Tumour Types (Germline Mutations)"].split(", ")}
|
274
|
+
Cancer["cancer_genes.tsv"].tsv(:persist => true, :type => :list).values_at(*self.name).collect{|v| v.nil? ? nil : (v["Tumour Types (Somatic Mutations)"].split(", ") + v["Tumour Types (Germline Mutations)"].split(", ")).uniq}
|
229
275
|
end
|
230
276
|
persist :related_cancers
|
231
277
|
|
@@ -234,7 +280,7 @@ module Gene
|
|
234
280
|
raise "No organism defined" if self.organism.nil?
|
235
281
|
clean_organism = self.organism.sub(/\/.*/,'') + '/jun2011'
|
236
282
|
names.organism = clean_organism
|
237
|
-
ranges = names.chromosome.zip(name.
|
283
|
+
ranges = names.chromosome.zip(name.chr_range).collect do |chromosome, range|
|
238
284
|
next if range.nil?
|
239
285
|
[chromosome, range.begin, range.end] * ":"
|
240
286
|
end
|
@@ -243,62 +289,89 @@ module Gene
|
|
243
289
|
persist :somatic_snvs
|
244
290
|
|
245
291
|
|
292
|
+
property :literature_score do |terms|
|
293
|
+
terms = terms.collect{|t| t.stem}
|
294
|
+
articles = self.articles
|
295
|
+
if articles.nil? or articles.empty?
|
296
|
+
0
|
297
|
+
else
|
298
|
+
articles.inject(0){|acc,article| acc += article.text.words.select{|word| terms.include? word}.length }.to_f / articles.length
|
299
|
+
end
|
300
|
+
end
|
301
|
+
persist :literature_score
|
302
|
+
|
303
|
+
|
246
304
|
property :ihop_interactions => :single do
|
247
305
|
uniprot = self.uniprot
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
end
|
306
|
+
if uniprot.nil?
|
307
|
+
nil
|
308
|
+
else
|
309
|
+
sentences = []
|
253
310
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
}.compact.flatten.each do |evidence|
|
260
|
-
symbol = evidence.attr('symbol')
|
261
|
-
taxid = evidence.attr('ncbiTaxId')
|
262
|
-
|
263
|
-
if Organism.entrez_taxids(self.organism).list.include? taxid
|
264
|
-
interactors << symbol
|
265
|
-
end
|
311
|
+
begin
|
312
|
+
url = "http://ws.bioinfo.cnio.es/iHOP/cgi-bin/getSymbolInteractions?ncbiTaxId=9606&reference=#{uniprot}&namespace=UNIPROT__AC"
|
313
|
+
doc = Nokogiri::XML(Open.read(url))
|
314
|
+
sentences = doc.css("iHOPsentence")
|
315
|
+
rescue
|
266
316
|
end
|
267
|
-
end
|
268
|
-
|
269
|
-
Gene.setup(interactors, "Associated Gene Name", self.organism).organism
|
270
|
-
|
271
|
-
interactors_ensembl = interactors.ensembl
|
272
317
|
|
273
|
-
|
274
|
-
interactors.collect{|i| i}.zip(interactors_ensembl.collect{|i| i}).each do |o,e|
|
275
|
-
interactors2ensembl[o] = e
|
318
|
+
sentences
|
276
319
|
end
|
320
|
+
end
|
277
321
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
322
|
+
property :tagged_ihop_interactions => :single do
|
323
|
+
interactors = []
|
324
|
+
ihop_interactions = self.ihop_interactions
|
325
|
+
if ihop_interactions.nil?
|
326
|
+
nil
|
327
|
+
else
|
328
|
+
ihop_interactions.each do |sentence|
|
329
|
+
sentence.css('iHOPatom').collect{|atom|
|
330
|
+
atom.css('evidence');
|
331
|
+
}.compact.flatten.each do |evidence|
|
282
332
|
symbol = evidence.attr('symbol')
|
283
333
|
taxid = evidence.attr('ncbiTaxId')
|
284
334
|
|
285
335
|
if Organism.entrez_taxids(self.organism).list.include? taxid
|
286
|
-
symbol
|
287
|
-
else
|
288
|
-
nil
|
336
|
+
interactors << symbol
|
289
337
|
end
|
290
|
-
end
|
338
|
+
end
|
339
|
+
end
|
291
340
|
|
292
|
-
|
341
|
+
Gene.setup(interactors, "Associated Gene Name", self.organism).organism
|
342
|
+
|
343
|
+
interactors_ensembl = interactors.ensembl
|
293
344
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
345
|
+
interactors2ensembl = {}
|
346
|
+
interactors.collect{|i| i}.zip(interactors_ensembl.collect{|i| i}).each do |o,e|
|
347
|
+
interactors2ensembl[o] = e
|
348
|
+
end
|
349
|
+
|
350
|
+
ihop_interactions.collect do |sentence|
|
351
|
+
sentence.css('iHOPatom').each{|atom|
|
352
|
+
literal = atom.content()
|
353
|
+
evidences = atom.css('evidence')
|
354
|
+
symbol = evidences.collect do |evidence|
|
355
|
+
symbol = evidence.attr('symbol')
|
356
|
+
taxid = evidence.attr('ncbiTaxId')
|
357
|
+
|
358
|
+
if Organism.entrez_taxids(self.organism).list.include? taxid
|
359
|
+
symbol
|
360
|
+
else
|
361
|
+
nil
|
362
|
+
end
|
363
|
+
end.compact.first
|
364
|
+
|
365
|
+
evidences.remove
|
366
|
+
|
367
|
+
if interactors2ensembl.include? symbol and not interactors2ensembl[symbol].nil?
|
368
|
+
atom.children.remove
|
369
|
+
interactor = interactors2ensembl[symbol]
|
370
|
+
atom.replace interactor.respond_to?(:link)? interactor.link(nil, nil, :html_link_extra_attrs => "title='#{literal}'") : interactor.name
|
371
|
+
end
|
372
|
+
}
|
373
|
+
sentence.to_s
|
374
|
+
end
|
301
375
|
end
|
302
376
|
end
|
303
377
|
end
|
304
|
-
|