rbbt-entities 1.1.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/icgc2rbbt.rb +23 -0
- data/bin/vcf2rbbt.rb +15 -0
- data/lib/rbbt/entity/chromosome_range.rb +73 -0
- data/lib/rbbt/entity/cnv.rb +20 -2
- data/lib/rbbt/entity/gene.rb +147 -74
- data/lib/rbbt/entity/genomic_mutation.rb +380 -50
- data/lib/rbbt/entity/genotype.rb +10 -4
- data/lib/rbbt/entity/interactor.rb +6 -0
- data/lib/rbbt/entity/mutated_isoform.rb +171 -83
- data/lib/rbbt/entity/pmid.rb +33 -6
- data/lib/rbbt/entity/protein.rb +36 -7
- data/lib/rbbt/entity/transcript.rb +20 -4
- data/lib/rbbt/entity.rb +123 -68
- data/test/rbbt/entity/test_gene.rb +16 -2
- data/test/rbbt/entity/test_genomic_mutation.rb +53 -1
- data/test/rbbt/entity/test_pmid.rb +19 -0
- data/test/rbbt/test_entity.rb +100 -5
- metadata +51 -72
data/bin/icgc2rbbt.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
|
4
|
+
file = ARGV.shift
|
5
|
+
directory = ARGV.shift
|
6
|
+
|
7
|
+
genotypes = {}
|
8
|
+
Open.read(file).split("\n").each do |line|
|
9
|
+
next if line =~ /^Cancer Type/
|
10
|
+
|
11
|
+
chr, pos, ref, mut, sample = line.split(/\t/).values_at 2, 3, 6, 10, 35
|
12
|
+
|
13
|
+
chr.sub!(/chr/,'')
|
14
|
+
mut = '-' * (mut.length - 1) if mut =~/^-[ACGT]/
|
15
|
+
|
16
|
+
genotypes[sample] ||= []
|
17
|
+
genotypes[sample] << [chr, pos, mut] * ":"
|
18
|
+
end
|
19
|
+
|
20
|
+
genotypes.each do |sample, mutations|
|
21
|
+
mutations.uniq!
|
22
|
+
Open.write(File.join(directory, sample), mutations.uniq * "\n")
|
23
|
+
end
|
data/bin/vcf2rbbt.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
|
4
|
+
file = ARGV.shift
|
5
|
+
|
6
|
+
Open.read(file).split("\n").each do |line|
|
7
|
+
next if line =~ /^#/
|
8
|
+
|
9
|
+
chr, pos, id, ref, mut, score = line.split(/\t/)
|
10
|
+
|
11
|
+
chr.sub!(/chr/,'')
|
12
|
+
mut = mut + '-' * (ref.length - mut.length) if ref.length > mut.length
|
13
|
+
|
14
|
+
puts [chr, pos, mut, score] * ":"
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/sources/organism'
|
4
|
+
require 'rbbt/entity/gene'
|
5
|
+
|
6
|
+
Workflow.require_workflow "Sequence"
|
7
|
+
|
8
|
+
module ChromosomeRange
|
9
|
+
extend Entity
|
10
|
+
|
11
|
+
self.annotation :organism
|
12
|
+
|
13
|
+
self.format = "Chromosome Range"
|
14
|
+
|
15
|
+
|
16
|
+
def self.text_to_unit(text)
|
17
|
+
text = text.sub('^', '+')
|
18
|
+
base = text.to_f
|
19
|
+
case
|
20
|
+
when text =~ /KB?$/
|
21
|
+
base * 1000
|
22
|
+
when text =~ /MB?$/
|
23
|
+
base * 1000_000
|
24
|
+
when text =~ /^\d+(\.\d+)?(e\+\d+)?$/
|
25
|
+
base
|
26
|
+
else
|
27
|
+
raise "Text format not understood: #{ text }"
|
28
|
+
end.to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
property :unit => :array2single do
|
32
|
+
self.collect{|range|
|
33
|
+
chr, start, eend = range.split(":")
|
34
|
+
[chr, ChromosomeRange.text_to_unit(start), ChromosomeRange.text_to_unit(eend)] * ":"
|
35
|
+
}
|
36
|
+
end
|
37
|
+
persist :unit
|
38
|
+
|
39
|
+
property :genes => :array2single do
|
40
|
+
Sequence.job(:genes_at_genomic_ranges, "ChromosomeRange", :organism => organism, :ranges => self.unit).run.tap{|t| t.namespace = organism}.values_at *self.unit
|
41
|
+
end
|
42
|
+
|
43
|
+
property :ensembl_browser => :single2array do
|
44
|
+
"http://#{Misc.ensembl_server(self.organism)}/Homo_sapiens/Location/View?db=core&r=#{chromosome}:#{start}-#{eend}"
|
45
|
+
end
|
46
|
+
|
47
|
+
property :chromosome => :array2single do
|
48
|
+
self.clean_annotations.collect{|r| r.split(":")[0]}
|
49
|
+
end
|
50
|
+
persist :_ary_chromosome
|
51
|
+
|
52
|
+
property :start => :array2single do
|
53
|
+
self.clean_annotations.collect{|r| r.split(":")[1].to_i}
|
54
|
+
end
|
55
|
+
persist :_ary_start
|
56
|
+
|
57
|
+
property :eend => :array2single do
|
58
|
+
self.clean_annotations.collect{|r| r.split(":")[2].to_i}
|
59
|
+
end
|
60
|
+
persist :_ary_eend
|
61
|
+
|
62
|
+
property :end => :array2single do
|
63
|
+
self.eend
|
64
|
+
end
|
65
|
+
persist :_ary_end
|
66
|
+
|
67
|
+
property :range => :array2single do
|
68
|
+
start.zip(self.end).collect{|s,e| (s..e)}
|
69
|
+
end
|
70
|
+
persist :_ary_range
|
71
|
+
|
72
|
+
|
73
|
+
end
|
data/lib/rbbt/entity/cnv.rb
CHANGED
@@ -26,12 +26,30 @@ module CNV
|
|
26
26
|
|
27
27
|
property :genes => :array2single do
|
28
28
|
@genes ||= begin
|
29
|
-
genes = Sequence.job(:genes_at_genomic_ranges, jobname, :organism => organism, :ranges => self).run
|
30
|
-
genes.unnamed = true
|
29
|
+
genes = Sequence.job(:genes_at_genomic_ranges, jobname, :organism => organism, :ranges => self, :unnamed => true).run
|
31
30
|
genes = genes.values_at *self
|
32
31
|
Gene.setup(genes, "Ensembl Gene ID", organism)
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
35
|
+
property :chromosome => :array2single do
|
36
|
+
self.clean_annotations.collect{|mut| mut.split(":")[0]}
|
37
|
+
end
|
38
|
+
persist :_ary_chromosome
|
39
|
+
|
40
|
+
property :start => :array2single do
|
41
|
+
self.clean_annotations.collect{|mut| mut.split(":")[1].to_i}
|
42
|
+
end
|
43
|
+
persist :_ary_start
|
44
|
+
|
45
|
+
property :end => :array2single do
|
46
|
+
self.clean_annotations.collect{|mut| mut.split(":")[2].to_i}
|
47
|
+
end
|
48
|
+
persist :_ary_end
|
49
|
+
|
50
|
+
property :range => :array2single do
|
51
|
+
start.zip(self.end).collect{|s,e| (s..e)}
|
52
|
+
end
|
53
|
+
|
36
54
|
end
|
37
55
|
|
data/lib/rbbt/entity/gene.rb
CHANGED
@@ -7,6 +7,7 @@ require 'rbbt/sources/cancer'
|
|
7
7
|
require 'rbbt/entity/protein'
|
8
8
|
require 'rbbt/entity/pmid'
|
9
9
|
require 'rbbt/entity/transcript'
|
10
|
+
require 'rbbt/bow/bow'
|
10
11
|
|
11
12
|
Workflow.require_workflow "Translation"
|
12
13
|
|
@@ -15,7 +16,7 @@ module Gene
|
|
15
16
|
|
16
17
|
def self.ensg2enst(organism, gene)
|
17
18
|
@@ensg2enst ||= {}
|
18
|
-
@@ensg2enst[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :flat, :key_field => "Ensembl Gene ID", :fields => ["Ensembl Transcript ID"], :persist => true
|
19
|
+
@@ensg2enst[organism] ||= Organism.gene_transcripts(organism).tsv(:type => :flat, :key_field => "Ensembl Gene ID", :fields => ["Ensembl Transcript ID"], :persist => true, :unnamed => true)
|
19
20
|
|
20
21
|
if Array === gene
|
21
22
|
@@ensg2enst[organism].values_at *gene
|
@@ -24,7 +25,6 @@ module Gene
|
|
24
25
|
end
|
25
26
|
end
|
26
27
|
|
27
|
-
|
28
28
|
def self.filter(query, field = nil, options = nil, entity = nil)
|
29
29
|
return true if query == entity
|
30
30
|
|
@@ -33,39 +33,86 @@ module Gene
|
|
33
33
|
false
|
34
34
|
end
|
35
35
|
|
36
|
+
def self.gene_list_bases(genes)
|
37
|
+
genes = genes.ensembl
|
38
|
+
chromosome_genes = {}
|
39
|
+
Misc.process_to_hash(genes){|genes| genes.chromosome}.each{|gene, chr| chromosome_genes[chr] ||= []; chromosome_genes[chr] << gene}
|
40
|
+
total = 0
|
41
|
+
chromosome_genes.each do |chr,gs|
|
42
|
+
next if chr.nil?
|
43
|
+
total += Misc.total_length(genes.annotate(gs).chr_range.compact)
|
44
|
+
end
|
45
|
+
|
46
|
+
total
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.gene_list_exon_bases(genes)
|
50
|
+
genes = genes.ensembl
|
51
|
+
chromosome_genes = {}
|
52
|
+
Misc.process_to_hash(genes){|genes| genes.chromosome}.each{|gene, chr| chromosome_genes[chr] ||= []; chromosome_genes[chr] << gene}
|
53
|
+
|
54
|
+
@@exon_range_tsv ||= {}
|
55
|
+
organism = genes.organism
|
56
|
+
@@exon_range_tsv[organism] ||= Organism.exons(organism).tsv :persist => true, :fields => ["Exon Chr Start", "Exon Chr End"], :type => :list, :cast => :to_i, :unnamed => true
|
57
|
+
total = 0
|
58
|
+
|
59
|
+
chromosome_genes.each do |chr,gs|
|
60
|
+
next if chr.nil?
|
61
|
+
exons = genes.annotate(gs).transcripts.compact.flatten.exons.compact.flatten.uniq
|
62
|
+
|
63
|
+
exon_ranges = exons.collect{|exon|
|
64
|
+
Log.low "Exon #{ exon } does not have range" unless @@exon_range_tsv[organism].include? exon
|
65
|
+
next unless @@exon_range_tsv[organism].include? exon
|
66
|
+
pos = @@exon_range_tsv[organism][exon]
|
67
|
+
(pos.first..pos.last)
|
68
|
+
}.compact
|
69
|
+
total += Misc.total_length(exon_ranges)
|
70
|
+
end
|
71
|
+
|
72
|
+
total
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
36
77
|
self.annotation :format
|
37
78
|
self.annotation :organism
|
38
79
|
|
39
|
-
self.format = Organism
|
80
|
+
self.format = Organism.identifiers("Hsa").all_fields - ["Ensembl Protein ID", "Ensembl Transcript ID"]
|
40
81
|
|
41
82
|
property :ortholog => :array2single do |other|
|
42
83
|
return self if organism =~ /^#{ other }(?!\w)/
|
43
84
|
new_organism = organism.split(":")
|
44
85
|
new_organism[0] = other
|
45
86
|
new_organism = new_organism * "/"
|
46
|
-
Gene.setup(Organism[organism]["ortholog_#{other}"].tsv(:persist => true).values_at(*self.ensembl).collect{|l| l.first}, "Ensembl Gene ID", new_organism)
|
87
|
+
Gene.setup(Organism[organism]["ortholog_#{other}"].tsv(:persist => true, :unnamed => true).values_at(*self.ensembl).collect{|l| l.first}, "Ensembl Gene ID", new_organism)
|
47
88
|
end
|
48
89
|
persist :ortholog
|
49
90
|
|
50
91
|
property :to => :array2single do |new_format|
|
51
92
|
return self if format == new_format
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
property :__to => :array2single do |new_format|
|
56
|
-
return self if format == new_format
|
57
|
-
to!(new_format).collect!{|v| Array === v ? v.first : v}
|
93
|
+
genes = Translation.job(:tsv_translate, "", :organism => organism, :genes => self, :format => new_format).exec.values_at(*self)
|
94
|
+
Gene.setup(genes, new_format, organism)
|
95
|
+
genes
|
58
96
|
end
|
59
97
|
|
60
98
|
property :strand => :array2single do
|
61
|
-
|
99
|
+
@@strand_tsv ||= {}
|
100
|
+
@@strand_tsv[organism] ||= Organism.gene_positions(organism).tsv(:fields => ["Strand"], :type => :single, :persist => true, :unnamed => true)
|
101
|
+
to("Ensembl Gene ID").collect do |gene|
|
102
|
+
@@strand_tsv[organism][gene]
|
103
|
+
end
|
62
104
|
end
|
63
|
-
persist :
|
105
|
+
persist :_ary_strand
|
64
106
|
|
65
107
|
property :ensembl => :array2single do
|
66
108
|
to "Ensembl Gene ID"
|
67
109
|
end
|
68
110
|
|
111
|
+
property :biotype => :array2single do
|
112
|
+
Organism.gene_biotype(organism).tsv(:persist => true, :type => :single, :unnamed => true).values_at *self.ensembl
|
113
|
+
end
|
114
|
+
persist :biotype
|
115
|
+
|
69
116
|
property :entrez => :array2single do
|
70
117
|
to "Entrez Gene ID"
|
71
118
|
end
|
@@ -73,26 +120,25 @@ module Gene
|
|
73
120
|
property :uniprot => :array2single do
|
74
121
|
to "UniProt/SwissProt Accession"
|
75
122
|
end
|
76
|
-
persist :uniprot
|
77
123
|
|
78
124
|
property :name => :array2single do
|
125
|
+
return self if self.format == "Associated Gene Name"
|
79
126
|
to "Associated Gene Name"
|
80
127
|
end
|
81
|
-
persist :name
|
82
128
|
|
83
129
|
property :chr_start => :array2single do
|
84
|
-
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"]).values_at *self
|
130
|
+
Organism.gene_positions(organism).tsv(:persist => true, :type => :single, :cast => :to_i, :fields => ["Gene Start"], :unnamed => true).values_at *self
|
85
131
|
end
|
86
132
|
persist :chr_start
|
87
133
|
|
88
134
|
property :go_bp_terms => :array2single do
|
89
|
-
Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat).values_at *self.ensembl
|
135
|
+
Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :unnamed => true).values_at *self.ensembl
|
90
136
|
end
|
91
137
|
persist :go_bp_terms
|
92
138
|
|
93
139
|
property :long_name => :array2single do
|
94
140
|
entre = self.entrez
|
95
|
-
gene = Entrez.get_gene(entrez).values_at(*entrez).collect{|gene| gene.nil? ? nil : gene.description.flatten.first}
|
141
|
+
gene = Entrez.get_gene(entrez).values_at(*entrez).collect{|gene| gene.nil? ? nil : (gene.description || []).flatten.first}
|
96
142
|
end
|
97
143
|
persist :long_name
|
98
144
|
|
@@ -140,26 +186,26 @@ module Gene
|
|
140
186
|
persist :max_protein_length
|
141
187
|
|
142
188
|
property :chromosome => :array2single do
|
143
|
-
|
144
|
-
|
189
|
+
@@chromosome_tsv ||= {}
|
190
|
+
@@chromosome_tsv[organism] ||= Organism.gene_positions(organism).tsv :fields => ["Chromosome Name"], :type => :single, :persist => true, :unnamed => true
|
145
191
|
if Array === self
|
146
192
|
to("Ensembl Gene ID").collect do |gene|
|
147
|
-
|
193
|
+
@@chromosome_tsv[organism][gene]
|
148
194
|
end
|
149
195
|
else
|
150
|
-
|
196
|
+
@@chromosome_tsv[organism][to("Ensembl Gene ID")]
|
151
197
|
end
|
152
198
|
end
|
153
199
|
persist :chromosome
|
154
200
|
|
155
|
-
property :
|
156
|
-
|
201
|
+
property :chr_range => :array2single do
|
202
|
+
chr_range_index ||= Organism.gene_positions(organism).tsv :fields => ["Gene Start", "Gene End"], :type => :list, :persist => true, :cast => :to_i, :unnamed => true
|
157
203
|
to("Ensembl Gene ID").collect do |gene|
|
158
|
-
next if not
|
159
|
-
Range.new *
|
204
|
+
next if not chr_range_index.include? gene
|
205
|
+
Range.new *chr_range_index[gene]
|
160
206
|
end
|
161
207
|
end
|
162
|
-
persist :
|
208
|
+
persist :chr_range
|
163
209
|
|
164
210
|
property :articles => :array2single do
|
165
211
|
PMID.setup(Organism.gene_pmids(organism).tsv(:persist => true, :fields => ["PMID"], :type => :flat, :unnamed => true).values_at *self.entrez)
|
@@ -167,14 +213,14 @@ module Gene
|
|
167
213
|
persist :articles
|
168
214
|
|
169
215
|
property :sequence => :array2single do
|
170
|
-
|
171
|
-
|
172
|
-
|
216
|
+
@@sequence_tsv ||= {}
|
217
|
+
@@sequence_tsv[organism] ||= Organism.gene_sequence(organism).tsv :persist => true, :unnamed => true
|
218
|
+
@@sequence_tsv[organism].values_at *self.ensembl
|
173
219
|
end
|
174
220
|
persist :sequence
|
175
221
|
|
176
222
|
property :matador_drugs => :array2single do
|
177
|
-
@@matador ||= Matador.protein_drug.tsv(:persist => false
|
223
|
+
@@matador ||= Matador.protein_drug.tsv(:persist => false, :unnamed => true)
|
178
224
|
|
179
225
|
ensg = self.to("Ensembl Gene ID")
|
180
226
|
|
@@ -225,7 +271,7 @@ module Gene
|
|
225
271
|
persist :pathway_drugs
|
226
272
|
|
227
273
|
property :related_cancers => :array2single do
|
228
|
-
Cancer["cancer_genes.tsv"].tsv(:persist => true, :type => :list).values_at(*self.name).collect{|v| v.nil? ? nil : v["Tumour Types (Somatic Mutations)"].split(", ") + v["Tumour Types (Germline Mutations)"].split(", ")}
|
274
|
+
Cancer["cancer_genes.tsv"].tsv(:persist => true, :type => :list).values_at(*self.name).collect{|v| v.nil? ? nil : (v["Tumour Types (Somatic Mutations)"].split(", ") + v["Tumour Types (Germline Mutations)"].split(", ")).uniq}
|
229
275
|
end
|
230
276
|
persist :related_cancers
|
231
277
|
|
@@ -234,7 +280,7 @@ module Gene
|
|
234
280
|
raise "No organism defined" if self.organism.nil?
|
235
281
|
clean_organism = self.organism.sub(/\/.*/,'') + '/jun2011'
|
236
282
|
names.organism = clean_organism
|
237
|
-
ranges = names.chromosome.zip(name.
|
283
|
+
ranges = names.chromosome.zip(name.chr_range).collect do |chromosome, range|
|
238
284
|
next if range.nil?
|
239
285
|
[chromosome, range.begin, range.end] * ":"
|
240
286
|
end
|
@@ -243,62 +289,89 @@ module Gene
|
|
243
289
|
persist :somatic_snvs
|
244
290
|
|
245
291
|
|
292
|
+
property :literature_score do |terms|
|
293
|
+
terms = terms.collect{|t| t.stem}
|
294
|
+
articles = self.articles
|
295
|
+
if articles.nil? or articles.empty?
|
296
|
+
0
|
297
|
+
else
|
298
|
+
articles.inject(0){|acc,article| acc += article.text.words.select{|word| terms.include? word}.length }.to_f / articles.length
|
299
|
+
end
|
300
|
+
end
|
301
|
+
persist :literature_score
|
302
|
+
|
303
|
+
|
246
304
|
property :ihop_interactions => :single do
|
247
305
|
uniprot = self.uniprot
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
end
|
306
|
+
if uniprot.nil?
|
307
|
+
nil
|
308
|
+
else
|
309
|
+
sentences = []
|
253
310
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
}.compact.flatten.each do |evidence|
|
260
|
-
symbol = evidence.attr('symbol')
|
261
|
-
taxid = evidence.attr('ncbiTaxId')
|
262
|
-
|
263
|
-
if Organism.entrez_taxids(self.organism).list.include? taxid
|
264
|
-
interactors << symbol
|
265
|
-
end
|
311
|
+
begin
|
312
|
+
url = "http://ws.bioinfo.cnio.es/iHOP/cgi-bin/getSymbolInteractions?ncbiTaxId=9606&reference=#{uniprot}&namespace=UNIPROT__AC"
|
313
|
+
doc = Nokogiri::XML(Open.read(url))
|
314
|
+
sentences = doc.css("iHOPsentence")
|
315
|
+
rescue
|
266
316
|
end
|
267
|
-
end
|
268
|
-
|
269
|
-
Gene.setup(interactors, "Associated Gene Name", self.organism).organism
|
270
|
-
|
271
|
-
interactors_ensembl = interactors.ensembl
|
272
317
|
|
273
|
-
|
274
|
-
interactors.collect{|i| i}.zip(interactors_ensembl.collect{|i| i}).each do |o,e|
|
275
|
-
interactors2ensembl[o] = e
|
318
|
+
sentences
|
276
319
|
end
|
320
|
+
end
|
277
321
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
322
|
+
property :tagged_ihop_interactions => :single do
|
323
|
+
interactors = []
|
324
|
+
ihop_interactions = self.ihop_interactions
|
325
|
+
if ihop_interactions.nil?
|
326
|
+
nil
|
327
|
+
else
|
328
|
+
ihop_interactions.each do |sentence|
|
329
|
+
sentence.css('iHOPatom').collect{|atom|
|
330
|
+
atom.css('evidence');
|
331
|
+
}.compact.flatten.each do |evidence|
|
282
332
|
symbol = evidence.attr('symbol')
|
283
333
|
taxid = evidence.attr('ncbiTaxId')
|
284
334
|
|
285
335
|
if Organism.entrez_taxids(self.organism).list.include? taxid
|
286
|
-
symbol
|
287
|
-
else
|
288
|
-
nil
|
336
|
+
interactors << symbol
|
289
337
|
end
|
290
|
-
end
|
338
|
+
end
|
339
|
+
end
|
291
340
|
|
292
|
-
|
341
|
+
Gene.setup(interactors, "Associated Gene Name", self.organism).organism
|
342
|
+
|
343
|
+
interactors_ensembl = interactors.ensembl
|
293
344
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
345
|
+
interactors2ensembl = {}
|
346
|
+
interactors.collect{|i| i}.zip(interactors_ensembl.collect{|i| i}).each do |o,e|
|
347
|
+
interactors2ensembl[o] = e
|
348
|
+
end
|
349
|
+
|
350
|
+
ihop_interactions.collect do |sentence|
|
351
|
+
sentence.css('iHOPatom').each{|atom|
|
352
|
+
literal = atom.content()
|
353
|
+
evidences = atom.css('evidence')
|
354
|
+
symbol = evidences.collect do |evidence|
|
355
|
+
symbol = evidence.attr('symbol')
|
356
|
+
taxid = evidence.attr('ncbiTaxId')
|
357
|
+
|
358
|
+
if Organism.entrez_taxids(self.organism).list.include? taxid
|
359
|
+
symbol
|
360
|
+
else
|
361
|
+
nil
|
362
|
+
end
|
363
|
+
end.compact.first
|
364
|
+
|
365
|
+
evidences.remove
|
366
|
+
|
367
|
+
if interactors2ensembl.include? symbol and not interactors2ensembl[symbol].nil?
|
368
|
+
atom.children.remove
|
369
|
+
interactor = interactors2ensembl[symbol]
|
370
|
+
atom.replace interactor.respond_to?(:link)? interactor.link(nil, nil, :html_link_extra_attrs => "title='#{literal}'") : interactor.name
|
371
|
+
end
|
372
|
+
}
|
373
|
+
sentence.to_s
|
374
|
+
end
|
301
375
|
end
|
302
376
|
end
|
303
377
|
end
|
304
|
-
|