sequence_logo 1.1.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,92 +0,0 @@
1
- class IUPAC < String
2
- CODE = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
3
- "AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
4
- "CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
5
- REVCODE = CODE.invert
6
-
7
- def dup
8
- IUPAC.new(self)
9
- end
10
-
11
- def initialize(words)
12
- if words.is_a?(Array)
13
- iupac = (0...words[0].size).collect { |i|
14
- (0...words.size).collect { |j| words[j][i,1] }.uniq.sort.inject("") { |cola, letter| cola += letter }
15
- }.inject("") { |iup, cola|
16
- checkerr("bad letter set #{cola}") { !CODE.has_key?(cola) }
17
- iup += CODE[cola]
18
- }
19
- super(iupac)
20
- elsif words.is_a?(IUPAC)
21
- super(words)
22
- elsif words.is_a?(String)
23
- checkerr("word #{words} has strange characters") { words.tr('ACGTURYKMSWBDHVN', '').size > 0 }
24
- super(words)
25
- end
26
- end
27
-
28
- def ==(iupac)
29
- return false if self.size != iupac.size
30
- (0...self.size).inject(true) { |result, i| result &= IUPACOM[self[i,1]][iupac[i,1]] }
31
- end
32
-
33
- def merge(iupac)
34
- return nil if self.size != iupac.size
35
- res = (0...self.size).inject("") { |res, i|
36
- merges = REVCODE[self[i,1]].split(//).concat(REVCODE[iupac[i,1]].split(//)).uniq.sort.inject("") { |s, c| s += c}
37
- res << CODE[merges]
38
- }
39
- return IUPAC.new(res)
40
- end
41
-
42
- def include?(iupac)
43
- return false if self.size < iupac.size || !iupac.is_a?(IUPAC)
44
- (0..self.size-iupac.size).each { |i|
45
- return i if IUPAC.new(self[i,iupac.size]) == iupac
46
- }
47
- return false
48
- end
49
-
50
- def compl
51
- return self.tr("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
52
- end
53
-
54
- def compl!
55
- self.tr!("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
56
- return self
57
- end
58
-
59
- alias reverse_string reverse
60
- def reverse
61
- return IUPAC.new(reverse_string)
62
- end
63
-
64
- alias comp! compl!
65
- alias complement! compl!
66
- alias comp compl
67
- alias complement compl
68
-
69
- private
70
- IUPACOM = { "A" => {"A" => :llib, "R" => :llib, "M" => :llib, "W" => :llib, "D" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
71
- "C" => {"C" => :llib, "Y" => :llib, "M" => :llib, "S" => :llib, "B" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
72
- "G" => {"G" => :llib, "R" => :llib, "K" => :llib, "S" => :llib, "B" => :llib, "D" => :llib, "V" => :llib, "N" => :llib},
73
- "T" => {"T" => :llib, "Y" => :llib, "K" => :llib, "W" => :llib, "B" => :llib, "D" => :llib, "H" => :llib, "N" => :llib}
74
- }
75
- IUPACOM["R"] = IUPACOM["G"].merge(IUPACOM["A"])
76
- IUPACOM["Y"] = IUPACOM["T"].merge(IUPACOM["C"])
77
- IUPACOM["K"] = IUPACOM["G"].merge(IUPACOM["T"])
78
- IUPACOM["M"] = IUPACOM["A"].merge(IUPACOM["C"])
79
- IUPACOM["S"] = IUPACOM["G"].merge(IUPACOM["C"])
80
- IUPACOM["W"] = IUPACOM["A"].merge(IUPACOM["T"])
81
- IUPACOM["B"] = IUPACOM["G"].merge(IUPACOM["T"].merge(IUPACOM["C"]))
82
- IUPACOM["D"] = IUPACOM["G"].merge(IUPACOM["A"].merge(IUPACOM["T"]))
83
- IUPACOM["H"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["T"]))
84
- IUPACOM["V"] = IUPACOM["G"].merge(IUPACOM["C"].merge(IUPACOM["A"]))
85
- IUPACOM["N"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["G"].merge(IUPACOM["T"])))
86
-
87
- # IUPACMERGE = CODE.merge({
88
- # "AA" => "A", "CC" => "C", "GG" => "G", "TT" => "T",
89
- #
90
- # })
91
-
92
- end
@@ -1,574 +0,0 @@
1
- module Ytilib
2
- class PM
3
-
4
- attr_reader :matrix, :size
5
- attr_accessor :words_count
6
-
7
- alias length size
8
-
9
- def each_position_index(&block)
10
- @matrix['A'].each_index(&block)
11
- end
12
-
13
- def each_position(&block)
14
- return enum_for(:each_position) unless block_given?
15
- @matrix['A'].each_index do |i|
16
- position = ['A', 'C', 'G', 'T'].map{|letter| @matrix[letter][i] }
17
- yield position
18
- end
19
- end
20
-
21
- def score_mean(bckgr = Randoom::DEF_PROBS)
22
- (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
23
- end
24
-
25
- def score_variance(bckgr = Randoom::DEF_PROBS)
26
- (0...@size).inject(0.0) { |m2, i|
27
- deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2
28
- m2 += deltai
29
- }
30
- end
31
-
32
- def p_value(threshold, mean = nil, variance = nil)
33
- mean = mean ? mean : score_mean
34
- variance = variance ? variance : score_variance
35
- n_ = (threshold - mean) / Math.sqrt(variance)
36
- p_value = (1 - Math.erf2(n_/Math.sqrt(2))) / 2.0
37
- end
38
-
39
- def best_word
40
- return (0...size).inject("") { |word, i|
41
- max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
42
- maxlets = ['A', 'C', 'G', 'T'].select { |l| @matrix[l][i] == max }
43
- word << (maxlets.size == 1 ? maxlets.first : "N")
44
- }
45
- end
46
-
47
- def strict_consensus
48
- return IUPAC.new((0...size).inject("") { |word, i|
49
- max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
50
- maxlets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += @matrix[l][i] == max ? l : ""}
51
- word += IUPAC::CODE[maxlets]
52
- })
53
- end
54
-
55
- def consensus_string(beautiful = false)
56
- checkerr("words count is undefined") { !@words_count }
57
- i2o4, thc, tlc = icd2of4, icdThc, icdTlc
58
- icd = infocod
59
-
60
- return String.new((0...size).inject("") { |word, i|
61
-
62
- scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse
63
-
64
- if icd[i] > i2o4
65
- scores = [scores.first]
66
- elsif icd[i] > thc
67
- scores = scores[0..1]
68
- elsif icd[i] > tlc
69
- scores = scores[0..2]
70
- end
71
-
72
- lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""}
73
-
74
- reslet = IUPAC::CODE[lets]
75
- reslet = reslet.downcase if beautiful && lets.size > 2
76
-
77
- word += reslet
78
- })
79
- end
80
-
81
- def consensus
82
- checkerr("words count is undefined") { !@words_count }
83
- i2o4, thc, tlc = icd2of4, icdThc, icdTlc
84
- icd = infocod
85
-
86
- return IUPAC.new((0...size).inject("") { |word, i|
87
-
88
- scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse
89
-
90
- if icd[i] > i2o4
91
- scores = [scores.first]
92
- elsif icd[i] > thc
93
- scores = scores[0..1]
94
- elsif icd[i] > tlc
95
- scores = scores[0..2]
96
- end
97
-
98
- lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""}
99
-
100
- word += IUPAC::CODE[lets]
101
- })
102
- end
103
-
104
- def find_hit(s, score_g, use2strands = true)
105
- (0..(s.size - @size)).each { |i|
106
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
107
- score_p, score_rc = score(seq), score(seq_rc)
108
- r = use2strands ? [score_p,score_rc].max : score_p
109
- return i if r >= score_g
110
- }
111
- return nil
112
- end
113
-
114
- def find_hits(s, score_g, use2strands = true)
115
- (0..(s.size - @size)).select { |i|
116
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
117
- score_p, score_rc = score(seq), score(seq_rc)
118
- r = use2strands ? [score_p,score_rc].max : score_p
119
- r >= score_g ? i : nil
120
- }.compact
121
- end
122
-
123
- def collect_hits(s, score_g, use2strands = true)
124
- result = []
125
- (0..(s.size - @size)).each { |i|
126
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
127
- score_p, score_rc = score(seq.upcase), score(seq_rc.upcase)
128
- result << [score_p, seq, false, i] if score_p >= score_g
129
- result << [score_rc, seq_rc, true, i] if score_rc >= score_g
130
- }
131
- result
132
- end
133
-
134
- def best_hit(s, use2strands = true)
135
-
136
- checkerr("too short sequence") { s.size < @size }
137
- return (0..(s.size - @size)).inject(-Float::MAX) { |r, i|
138
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
139
- score_p, score_rc = score(seq), score(seq_rc)
140
- r = use2strands ? [r,score_p,score_rc].max : [r,score_p].max
141
- }
142
- end
143
-
144
- def eql?(pm)
145
- return ['A','C','G','T'].inject(true) { |equal, letter|
146
- equal = equal && @matrix[letter].eql?(pm.matrix[letter])
147
- }
148
- end
149
-
150
- def flexeql?(pm)
151
- checkerr("for what?") { true }
152
- return ['A','C','G','T'].inject(true) { |equal, letter|
153
- # report "letter=#{letter}"
154
- equal = equal && (0...@size).inject(true) { |deepequal, position|
155
- # report "position=#{position}, delta=#{@matrix[letter][position] - pm.matrix[letter][position]}"
156
- deepequal = deepequal && (@matrix[letter][position] - pm.matrix[letter][position]).abs < 10**-11
157
- }
158
- }
159
- end
160
-
161
- def initialize(size, matrix = nil, words_count = nil)
162
- checkerr("matrix['A'].size != size, #{matrix['A'].size} != #{size}") { matrix != nil && size != matrix['A'].size }
163
- @size = size
164
- @matrix = matrix == nil ? PM.new_matrix(size) : matrix
165
- if !words_count || words_count <= 0
166
- words_count = col_sum(0)
167
- @words_count = words_count.round >= 2 ? words_count.round : nil
168
- else
169
- @words_count = words_count
170
- end
171
- end
172
-
173
- def col_sum(index = 0, letset = ['A','C','G','T'])
174
- return letset.inject(0) { |sum, l| sum += @matrix[l][index] }
175
- end
176
-
177
- def PM.col_sum(matrix, index = 0)
178
- return matrix['A'][index] + matrix['C'][index] + matrix['G'][index] + matrix['T'][index]
179
- end
180
-
181
- def to_pwm!(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1)
182
- @words_count = words_count if words_count && words_count > 0
183
-
184
- @matrix.each_key do |letter|
185
- (0...@size).each { |pos|
186
-
187
- #p "pcm"
188
- #p @matrix[letter][pos]
189
- #p @matrix[letter][pos] + (probs[letter] * pseudocount)
190
- #p ( (@words_count + pseudocount) * probs[letter])
191
- #exit
192
-
193
- @matrix[letter][pos] = Math::log( (@matrix[letter][pos] + (probs[letter] * pseudocount)) / ( (@words_count + pseudocount) * probs[letter]) )
194
-
195
- }
196
- end
197
-
198
- return self
199
- end
200
-
201
- def get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1)
202
- return self.dup.to_pwm!(words_count, probs, pseudocount)
203
- end
204
- alias to_pwm get_pwm
205
-
206
- def get_ppm(words_count = nil)
207
- words_count = @words_count unless words_count
208
- checkerr("undefined words count") { !words_count || words_count <= 0 }
209
- ppm = @matrix['N'] ? PM.new_matrix_iupac(@size) : PM.new_matrix(@size)
210
- @matrix.each_key { |letter|
211
- (0...@size).each { |i|
212
- ppm[letter][i] = @matrix[letter][i].to_f / words_count
213
- }
214
- }
215
- return PPM.new(@size, ppm, words_count)
216
- end
217
- alias to_ppm get_ppm
218
-
219
- def score(word)
220
- checkerr("word size != pwm.size") { @size != word.size }
221
- checkerr("word #{word} has strange characters") {
222
- @matrix.keys.include?('N') ? word.tr('ACGTRYKMSWBDHVN', '').size > 0 : word.tr('ACGT', '').size > 0
223
- }
224
- return (0...@size).inject(0) { |sum, i|
225
- sum += @matrix[word[i,1]][i]
226
- }
227
- end
228
-
229
- def best_score
230
- return (0...size).inject(0) { |sum, i|
231
- sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
232
- }
233
- end
234
-
235
- def worst_score
236
- return (0...size).inject(0) { |sum, i|
237
- sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.min
238
- }
239
- end
240
-
241
- def dup
242
- new_matrix = {}
243
- @matrix.each_key { |letter| new_matrix[letter] = @matrix[letter].dup }
244
- return PM.new(@size, new_matrix, @words_count)
245
- end
246
-
247
- def PM.new_pcm(words, iupacomp = false)
248
- size = words[0].size
249
- counts = PM.new_matrix(size)
250
- counts.each_value { |arr| arr.fill(0) }
251
- words.each { |word|
252
- 0.upto(size-1) { |i|
253
- letter = word[i,1].upcase
254
- checkerr("unknown letter #{letter}") { !['A', 'C', 'G', 'T', 'N'].include?(letter) }
255
- if letter != 'N'
256
- counts[letter][i] += 1
257
- else
258
- ['A', 'C', 'G', 'T'].each { |l| counts[l][i] += 0.25 }
259
- end
260
- }
261
- }
262
- newpcm = PM.new(size, counts, words.size)
263
- newpcm.iupacomp! if iupacomp
264
- return newpcm
265
- end
266
-
267
- def PM.new_pwm(words)
268
- pcm = PM.new_pcm(words)
269
- pcm.to_pwm!
270
- return pcm
271
- end
272
-
273
- def PM.load(filename)
274
- # supporting pat & pwm formats (letter-column and letter-row format)
275
- input = IO.read(filename)
276
- tm = []
277
- input.each_line { |line|
278
- l_a = line.split
279
- begin
280
- l_a = l_a.collect { |a_i| Float(a_i) }
281
- rescue
282
- next
283
- end
284
- tm << l_a
285
- }
286
- tm = tm.transpose if tm.size == 4
287
- matrix = PM.new_matrix(tm.size)
288
- tm.each_index { |i| ['A', 'C', 'G', 'T'].each_with_index { |l, j| matrix[l][i] = tm[i][j] } }
289
-
290
- ppm_mode = (0...tm.size).inject(true) { |ppm_ya, i| ppm_ya &= col_sum(matrix, i).round == 1 }
291
-
292
- return ppm_mode ? PPM.new(tm.size, matrix) : PM.new(tm.size, matrix)
293
- end
294
-
295
- def save(filename)
296
- File.open(filename, "w") { |out_f|
297
- case File.ext_wo_name(filename)
298
- when "pwm"
299
- ['A', 'C', 'G', 'T'].each { |letter|
300
- @matrix[letter].each { |e|
301
- out_f << "#{e} "
302
- }
303
- out_f << $/
304
- }
305
- when "pat"
306
- out_f.puts File.name_wo_ext(filename)
307
- (0...@size).each { |i|
308
- ['A', 'C', 'G', 'T'].each { |letter|
309
- out_f << "#{@matrix[letter][i]} "
310
- }
311
- out_f << $/
312
- }
313
- when "xml"
314
- checkerr("small-BiSMark is not supported at this moment")
315
- else
316
- checkerr("unknown motif file format specified")
317
- end
318
- }
319
- end
320
-
321
- def positiv!
322
- min = @matrix.values.collect { |v| v.min }.min.abs
323
- @matrix.each_value { |v| (0...v.size).each { |i| v[i] += min } }
324
- return self
325
- end
326
-
327
- def revcomp!
328
- @matrix['A'], @matrix['T'] = @matrix['T'], @matrix['A']
329
- @matrix['C'], @matrix['G'] = @matrix['G'], @matrix['C']
330
- @matrix.each_value { |v| v.reverse! }
331
- self
332
- end
333
-
334
- def to_bismark(b)
335
- pwm = @matrix['A'][0].is_a?(Float)
336
- attributes = {"length" => @size}
337
- attributes["words-count"] = @words_count if @words_count && @words_count > 0
338
- pe = b.add_element( pwm ? "PWM" : "PCM", attributes )
339
- each_position_index do |i|
340
- pm_c = pe.add_element("pm-column", {"position" => i+1})
341
- ['A', 'C', 'G', 'T'].each { |l|
342
- pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
343
- }
344
- end
345
- end
346
-
347
- def PM.from_bismark(b, iupacomp = false)
348
-
349
- checkerr("empty small-BiSMark file?") { !b }
350
- float_m = (b.name == "PPM" || b.name == "PWM" || b.name == "WPCM")
351
- words_count = b.attributes["words-count"] ? b.attributes["words-count"].to_f : nil
352
-
353
- matrix = {"A" => [], "C" => [], "G" => [], "T" => []}
354
- b.elements.each("pm-column") { |pmc|
355
- position = pmc.attributes["position"].to_i
356
- ['A', 'C', 'G', 'T'].each { |l| matrix[l][position-1] = float_m ? pmc.elements[l.downcase].get_text.to_s.to_f : pmc.elements[l.downcase].get_text.to_s.to_i }
357
- }
358
- if b.name == "PPM"
359
- newppm = PPM.new(matrix['A'].size, matrix, words_count)
360
- newppm.iupacomp! if iupacomp
361
- return newppm
362
- end
363
- if b.name == "PCM"
364
- @words_count = col_sum(matrix)
365
- newpcm = PM.new(matrix['A'].size, matrix, words_count)
366
- newpcm.iupacomp! if iupacomp
367
- return newpcm
368
- end
369
- if b.name == "PWM" && iupacomp
370
- raise "cannot force IUPAC compatible PWM"
371
- end
372
- return PM.new(matrix['A'].size, matrix, words_count)
373
- end
374
-
375
- IUPAC_LS = (IUPAC::CODE.keys - ['A','C','G','T']).collect { |iul| [IUPAC::CODE[iul], iul.split(//)] }
376
- def iupacomp!
377
- @words_count = (0...@size).collect { |i| col_sum(i) }.max unless @words_count # for unbalanced matrices (Genomatix has some)
378
- # @words_count = @words_count.round < 2.0 ? nil : @words_count.round
379
-
380
- IUPAC_LS.each { |iul_ls|
381
- @matrix[iul_ls[0]] = (0...@size).collect { |i| col_sum(i, iul_ls[1]) / iul_ls[1].size }
382
- }
383
-
384
- return self
385
- end
386
-
387
- def m3sd(bckgr = Randoom::DEF_PROBS)
388
-
389
- mean = (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
390
- dev = (0...@size).inject(0.0) { |m2, i|
391
- deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2
392
- m2 += deltai
393
- }
394
- sigma = Math.sqrt(dev)
395
-
396
- mean+3*sigma
397
- end
398
-
399
- def fixwc
400
- return unless @words_count
401
- @words_count = (0...@size).collect { |i| col_sum(i) }.max
402
- end
403
-
404
- protected
405
- def PM.new_matrix(size)
406
- return {
407
- 'A' => Array.new(size),
408
- 'C' => Array.new(size),
409
- 'G' => Array.new(size),
410
- 'T' => Array.new(size) }
411
- end
412
-
413
- def PM.new_matrix_iupac(size)
414
- return {
415
- 'A' => Array.new(size),
416
- 'C' => Array.new(size),
417
- 'G' => Array.new(size),
418
- 'T' => Array.new(size),
419
- 'R' => Array.new(size),
420
- 'Y' => Array.new(size),
421
- 'K' => Array.new(size),
422
- 'M' => Array.new(size),
423
- 'S' => Array.new(size),
424
- 'W' => Array.new(size),
425
- 'B' => Array.new(size),
426
- 'D' => Array.new(size),
427
- 'H' => Array.new(size),
428
- 'V' => Array.new(size),
429
- 'N' => Array.new(size)
430
- }
431
- end
432
-
433
- end
434
-
435
- class PPM < PM
436
-
437
- #DEPRECATED, use iupacomp! instead
438
- #def make_N_comp!
439
- # @matrix['N'] = (0...size).collect { 0.25 }
440
- # return self
441
- #end
442
-
443
- def initialize(size, matrix = nil, words_count = nil)
444
- checkerr("matrix['A'].size != size") { matrix != nil && size != matrix['A'].size }
445
- @size = size
446
- @matrix = matrix == nil ? PM.new_matrix(size) : matrix
447
- @words_count = words_count
448
- end
449
-
450
- def iupacomp!
451
- @words_count = 4.0 unless @words_count
452
-
453
- IUPAC_LS.each { |iul_ls|
454
- @matrix[iul_ls[0]] = (0...@size).collect { |i| col_sum(i, iul_ls[1]) / iul_ls[1].size }
455
- }
456
-
457
- return self
458
- end
459
-
460
- def score(word)
461
- checkerr("word size != ppm.size") { @size != word.size }
462
- checkerr("word #{word} has strange characters") {
463
- @matrix.keys.include?('N') ? word.tr('ACGTRYKMSWBDHVN', '').size > 0 : word.tr('ACGT', '').size > 0
464
- }
465
- return (0...@size).inject(1) { |mul, i|
466
- mul *= @matrix[word[i,1]][i]
467
- }
468
- end
469
-
470
- def best_score
471
- return (0...size).inject(1) { |mul, i|
472
- mul *= ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
473
- }
474
- end
475
-
476
- def worst_score
477
- return (0...size).inject(0) { |mul, i|
478
- mul *= ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.min
479
- }
480
- end
481
-
482
- def to_bismark(b)
483
- attributes = {"length" => @size}
484
- attributes["words-count"] = @words_count if @words_count
485
- pe = b.add_element("PPM", attributes)
486
- (0...@matrix['A'].size).each { |i|
487
- pm_c = pe.add_element("pm-column", {"position" => i+1})
488
- ['A', 'C', 'G', 'T'].each { |l|
489
- pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
490
- }
491
- }
492
- end
493
-
494
- def PPM.probs2IUPAC!(probs)
495
- IUPAC_LS.each { |iul_ls|
496
- probs[iul_ls[0]] = iul_ls[1].inject(0) { |sum, l| sum += probs[l] } / iul_ls[1].size
497
- }
498
- return probs
499
- end
500
-
501
- def get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1.0)
502
-
503
- probs = PPM.probs2IUPAC!(probs.dup)
504
-
505
- words_count = @words_count if !words_count || words_count == 0
506
- checkerr("undefined words count") { !words_count }
507
-
508
- pwm = @matrix['N'] ? PM.new_matrix_iupac(@size) : PM.new_matrix(@size)
509
-
510
- @matrix.each_key do |letter|
511
- (0...@size).each { |pos|
512
-
513
- pwm[letter][pos] = Math::log( (@matrix[letter][pos] * words_count + (probs[letter] * pseudocount) ) / ( (words_count + pseudocount) * probs[letter]) )
514
-
515
- }
516
- end
517
- return PM.new(@size, pwm, words_count)
518
- #pcm = get_pcm(words_count)
519
- #pcm.iupacomp! if @matrix['N']
520
- #return pcm.to_pwm!(words_count, probs, pseudocount)
521
- end
522
- alias to_pwm get_pwm
523
-
524
- def get_pwm0pc(probs = Randoom::DEF_PROBS)
525
- new_matrix = {}
526
- @matrix.each_key { |letter| new_matrix[letter] = @matrix[letter].dup }
527
- newpm = PM.new(@size, new_matrix, nil)
528
-
529
- new_matrix.each_key do |letter|
530
- (0...@size).each { |pos|
531
- new_matrix[letter][pos] = Math::log(@matrix[letter][pos] / probs[letter])
532
- }
533
- end
534
-
535
- return newpm
536
- end
537
-
538
- def to_pwm!
539
- raise "cannot force PPM class to PWM, use to_pwm instead"
540
- end
541
-
542
- def get_pcm(words_count = nil)
543
- words_count = @words_count unless words_count
544
- checkerr("undefined words count") { !words_count }
545
- counts = PM.new_matrix(@size)
546
- (0...size).each { |i|
547
- ['A', 'C', 'G', 'T'].each { |l|
548
- counts[l][i] = @matrix[l][i] * words_count
549
- }
550
- }
551
- newpcm = PM.new(size, counts, words_count).iupacomp!
552
- return newpcm
553
- end
554
- alias to_pcm get_pcm
555
-
556
- def PPM.from_IUPAC(iupac)
557
- matrix = {"A" => [], "C" => [], "G" => [], "T" => []}
558
-
559
- (0...iupac.size).each { |i|
560
- matrix.each_key { |k| matrix[k] << 0.0 }
561
- letters = IUPAC::REVCODE[iupac[i]]
562
- (0...letters.size).each { |j|
563
- matrix[letters[j]][-1] = 1.0/letters.size
564
- }
565
- }
566
-
567
- newppm = PPM.new(iupac.size, matrix, 4.0)
568
- newppm.iupacomp!
569
-
570
- newppm
571
- end
572
-
573
- end
574
- end