sequence_logo 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,92 +0,0 @@
1
- class IUPAC < String
2
- CODE = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
3
- "AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
4
- "CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
5
- REVCODE = CODE.invert
6
-
7
- def dup
8
- IUPAC.new(self)
9
- end
10
-
11
- def initialize(words)
12
- if words.is_a?(Array)
13
- iupac = (0...words[0].size).collect { |i|
14
- (0...words.size).collect { |j| words[j][i,1] }.uniq.sort.inject("") { |cola, letter| cola += letter }
15
- }.inject("") { |iup, cola|
16
- checkerr("bad letter set #{cola}") { !CODE.has_key?(cola) }
17
- iup += CODE[cola]
18
- }
19
- super(iupac)
20
- elsif words.is_a?(IUPAC)
21
- super(words)
22
- elsif words.is_a?(String)
23
- checkerr("word #{words} has strange characters") { words.tr('ACGTURYKMSWBDHVN', '').size > 0 }
24
- super(words)
25
- end
26
- end
27
-
28
- def ==(iupac)
29
- return false if self.size != iupac.size
30
- (0...self.size).inject(true) { |result, i| result &= IUPACOM[self[i,1]][iupac[i,1]] }
31
- end
32
-
33
- def merge(iupac)
34
- return nil if self.size != iupac.size
35
- res = (0...self.size).inject("") { |res, i|
36
- merges = REVCODE[self[i,1]].split(//).concat(REVCODE[iupac[i,1]].split(//)).uniq.sort.inject("") { |s, c| s += c}
37
- res << CODE[merges]
38
- }
39
- return IUPAC.new(res)
40
- end
41
-
42
- def include?(iupac)
43
- return false if self.size < iupac.size || !iupac.is_a?(IUPAC)
44
- (0..self.size-iupac.size).each { |i|
45
- return i if IUPAC.new(self[i,iupac.size]) == iupac
46
- }
47
- return false
48
- end
49
-
50
- def compl
51
- return self.tr("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
52
- end
53
-
54
- def compl!
55
- self.tr!("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
56
- return self
57
- end
58
-
59
- alias reverse_string reverse
60
- def reverse
61
- return IUPAC.new(reverse_string)
62
- end
63
-
64
- alias comp! compl!
65
- alias complement! compl!
66
- alias comp compl
67
- alias complement compl
68
-
69
- private
70
- IUPACOM = { "A" => {"A" => :llib, "R" => :llib, "M" => :llib, "W" => :llib, "D" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
71
- "C" => {"C" => :llib, "Y" => :llib, "M" => :llib, "S" => :llib, "B" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
72
- "G" => {"G" => :llib, "R" => :llib, "K" => :llib, "S" => :llib, "B" => :llib, "D" => :llib, "V" => :llib, "N" => :llib},
73
- "T" => {"T" => :llib, "Y" => :llib, "K" => :llib, "W" => :llib, "B" => :llib, "D" => :llib, "H" => :llib, "N" => :llib}
74
- }
75
- IUPACOM["R"] = IUPACOM["G"].merge(IUPACOM["A"])
76
- IUPACOM["Y"] = IUPACOM["T"].merge(IUPACOM["C"])
77
- IUPACOM["K"] = IUPACOM["G"].merge(IUPACOM["T"])
78
- IUPACOM["M"] = IUPACOM["A"].merge(IUPACOM["C"])
79
- IUPACOM["S"] = IUPACOM["G"].merge(IUPACOM["C"])
80
- IUPACOM["W"] = IUPACOM["A"].merge(IUPACOM["T"])
81
- IUPACOM["B"] = IUPACOM["G"].merge(IUPACOM["T"].merge(IUPACOM["C"]))
82
- IUPACOM["D"] = IUPACOM["G"].merge(IUPACOM["A"].merge(IUPACOM["T"]))
83
- IUPACOM["H"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["T"]))
84
- IUPACOM["V"] = IUPACOM["G"].merge(IUPACOM["C"].merge(IUPACOM["A"]))
85
- IUPACOM["N"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["G"].merge(IUPACOM["T"])))
86
-
87
- # IUPACMERGE = CODE.merge({
88
- # "AA" => "A", "CC" => "C", "GG" => "G", "TT" => "T",
89
- #
90
- # })
91
-
92
- end
@@ -1,574 +0,0 @@
1
- module Ytilib
2
- class PM
3
-
4
- attr_reader :matrix, :size
5
- attr_accessor :words_count
6
-
7
- alias length size
8
-
9
- def each_position_index(&block)
10
- @matrix['A'].each_index(&block)
11
- end
12
-
13
- def each_position(&block)
14
- return enum_for(:each_position) unless block_given?
15
- @matrix['A'].each_index do |i|
16
- position = ['A', 'C', 'G', 'T'].map{|letter| @matrix[letter][i] }
17
- yield position
18
- end
19
- end
20
-
21
- def score_mean(bckgr = Randoom::DEF_PROBS)
22
- (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
23
- end
24
-
25
- def score_variance(bckgr = Randoom::DEF_PROBS)
26
- (0...@size).inject(0.0) { |m2, i|
27
- deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2
28
- m2 += deltai
29
- }
30
- end
31
-
32
- def p_value(threshold, mean = nil, variance = nil)
33
- mean = mean ? mean : score_mean
34
- variance = variance ? variance : score_variance
35
- n_ = (threshold - mean) / Math.sqrt(variance)
36
- p_value = (1 - Math.erf2(n_/Math.sqrt(2))) / 2.0
37
- end
38
-
39
- def best_word
40
- return (0...size).inject("") { |word, i|
41
- max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
42
- maxlets = ['A', 'C', 'G', 'T'].select { |l| @matrix[l][i] == max }
43
- word << (maxlets.size == 1 ? maxlets.first : "N")
44
- }
45
- end
46
-
47
- def strict_consensus
48
- return IUPAC.new((0...size).inject("") { |word, i|
49
- max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
50
- maxlets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += @matrix[l][i] == max ? l : ""}
51
- word += IUPAC::CODE[maxlets]
52
- })
53
- end
54
-
55
- def consensus_string(beautiful = false)
56
- checkerr("words count is undefined") { !@words_count }
57
- i2o4, thc, tlc = icd2of4, icdThc, icdTlc
58
- icd = infocod
59
-
60
- return String.new((0...size).inject("") { |word, i|
61
-
62
- scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse
63
-
64
- if icd[i] > i2o4
65
- scores = [scores.first]
66
- elsif icd[i] > thc
67
- scores = scores[0..1]
68
- elsif icd[i] > tlc
69
- scores = scores[0..2]
70
- end
71
-
72
- lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""}
73
-
74
- reslet = IUPAC::CODE[lets]
75
- reslet = reslet.downcase if beautiful && lets.size > 2
76
-
77
- word += reslet
78
- })
79
- end
80
-
81
- def consensus
82
- checkerr("words count is undefined") { !@words_count }
83
- i2o4, thc, tlc = icd2of4, icdThc, icdTlc
84
- icd = infocod
85
-
86
- return IUPAC.new((0...size).inject("") { |word, i|
87
-
88
- scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse
89
-
90
- if icd[i] > i2o4
91
- scores = [scores.first]
92
- elsif icd[i] > thc
93
- scores = scores[0..1]
94
- elsif icd[i] > tlc
95
- scores = scores[0..2]
96
- end
97
-
98
- lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""}
99
-
100
- word += IUPAC::CODE[lets]
101
- })
102
- end
103
-
104
- def find_hit(s, score_g, use2strands = true)
105
- (0..(s.size - @size)).each { |i|
106
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
107
- score_p, score_rc = score(seq), score(seq_rc)
108
- r = use2strands ? [score_p,score_rc].max : score_p
109
- return i if r >= score_g
110
- }
111
- return nil
112
- end
113
-
114
- def find_hits(s, score_g, use2strands = true)
115
- (0..(s.size - @size)).select { |i|
116
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
117
- score_p, score_rc = score(seq), score(seq_rc)
118
- r = use2strands ? [score_p,score_rc].max : score_p
119
- r >= score_g ? i : nil
120
- }.compact
121
- end
122
-
123
- def collect_hits(s, score_g, use2strands = true)
124
- result = []
125
- (0..(s.size - @size)).each { |i|
126
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
127
- score_p, score_rc = score(seq.upcase), score(seq_rc.upcase)
128
- result << [score_p, seq, false, i] if score_p >= score_g
129
- result << [score_rc, seq_rc, true, i] if score_rc >= score_g
130
- }
131
- result
132
- end
133
-
134
- def best_hit(s, use2strands = true)
135
-
136
- checkerr("too short sequence") { s.size < @size }
137
- return (0..(s.size - @size)).inject(-Float::MAX) { |r, i|
138
- seq, seq_rc = s[i, @size], s[i, @size].revcomp!
139
- score_p, score_rc = score(seq), score(seq_rc)
140
- r = use2strands ? [r,score_p,score_rc].max : [r,score_p].max
141
- }
142
- end
143
-
144
- def eql?(pm)
145
- return ['A','C','G','T'].inject(true) { |equal, letter|
146
- equal = equal && @matrix[letter].eql?(pm.matrix[letter])
147
- }
148
- end
149
-
150
- def flexeql?(pm)
151
- checkerr("for what?") { true }
152
- return ['A','C','G','T'].inject(true) { |equal, letter|
153
- # report "letter=#{letter}"
154
- equal = equal && (0...@size).inject(true) { |deepequal, position|
155
- # report "position=#{position}, delta=#{@matrix[letter][position] - pm.matrix[letter][position]}"
156
- deepequal = deepequal && (@matrix[letter][position] - pm.matrix[letter][position]).abs < 10**-11
157
- }
158
- }
159
- end
160
-
161
- def initialize(size, matrix = nil, words_count = nil)
162
- checkerr("matrix['A'].size != size, #{matrix['A'].size} != #{size}") { matrix != nil && size != matrix['A'].size }
163
- @size = size
164
- @matrix = matrix == nil ? PM.new_matrix(size) : matrix
165
- if !words_count || words_count <= 0
166
- words_count = col_sum(0)
167
- @words_count = words_count.round >= 2 ? words_count.round : nil
168
- else
169
- @words_count = words_count
170
- end
171
- end
172
-
173
- def col_sum(index = 0, letset = ['A','C','G','T'])
174
- return letset.inject(0) { |sum, l| sum += @matrix[l][index] }
175
- end
176
-
177
- def PM.col_sum(matrix, index = 0)
178
- return matrix['A'][index] + matrix['C'][index] + matrix['G'][index] + matrix['T'][index]
179
- end
180
-
181
- def to_pwm!(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1)
182
- @words_count = words_count if words_count && words_count > 0
183
-
184
- @matrix.each_key do |letter|
185
- (0...@size).each { |pos|
186
-
187
- #p "pcm"
188
- #p @matrix[letter][pos]
189
- #p @matrix[letter][pos] + (probs[letter] * pseudocount)
190
- #p ( (@words_count + pseudocount) * probs[letter])
191
- #exit
192
-
193
- @matrix[letter][pos] = Math::log( (@matrix[letter][pos] + (probs[letter] * pseudocount)) / ( (@words_count + pseudocount) * probs[letter]) )
194
-
195
- }
196
- end
197
-
198
- return self
199
- end
200
-
201
- def get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1)
202
- return self.dup.to_pwm!(words_count, probs, pseudocount)
203
- end
204
- alias to_pwm get_pwm
205
-
206
- def get_ppm(words_count = nil)
207
- words_count = @words_count unless words_count
208
- checkerr("undefined words count") { !words_count || words_count <= 0 }
209
- ppm = @matrix['N'] ? PM.new_matrix_iupac(@size) : PM.new_matrix(@size)
210
- @matrix.each_key { |letter|
211
- (0...@size).each { |i|
212
- ppm[letter][i] = @matrix[letter][i].to_f / words_count
213
- }
214
- }
215
- return PPM.new(@size, ppm, words_count)
216
- end
217
- alias to_ppm get_ppm
218
-
219
- def score(word)
220
- checkerr("word size != pwm.size") { @size != word.size }
221
- checkerr("word #{word} has strange characters") {
222
- @matrix.keys.include?('N') ? word.tr('ACGTRYKMSWBDHVN', '').size > 0 : word.tr('ACGT', '').size > 0
223
- }
224
- return (0...@size).inject(0) { |sum, i|
225
- sum += @matrix[word[i,1]][i]
226
- }
227
- end
228
-
229
- def best_score
230
- return (0...size).inject(0) { |sum, i|
231
- sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
232
- }
233
- end
234
-
235
- def worst_score
236
- return (0...size).inject(0) { |sum, i|
237
- sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.min
238
- }
239
- end
240
-
241
- def dup
242
- new_matrix = {}
243
- @matrix.each_key { |letter| new_matrix[letter] = @matrix[letter].dup }
244
- return PM.new(@size, new_matrix, @words_count)
245
- end
246
-
247
- def PM.new_pcm(words, iupacomp = false)
248
- size = words[0].size
249
- counts = PM.new_matrix(size)
250
- counts.each_value { |arr| arr.fill(0) }
251
- words.each { |word|
252
- 0.upto(size-1) { |i|
253
- letter = word[i,1].upcase
254
- checkerr("unknown letter #{letter}") { !['A', 'C', 'G', 'T', 'N'].include?(letter) }
255
- if letter != 'N'
256
- counts[letter][i] += 1
257
- else
258
- ['A', 'C', 'G', 'T'].each { |l| counts[l][i] += 0.25 }
259
- end
260
- }
261
- }
262
- newpcm = PM.new(size, counts, words.size)
263
- newpcm.iupacomp! if iupacomp
264
- return newpcm
265
- end
266
-
267
- def PM.new_pwm(words)
268
- pcm = PM.new_pcm(words)
269
- pcm.to_pwm!
270
- return pcm
271
- end
272
-
273
- def PM.load(filename)
274
- # supporting pat & pwm formats (letter-column and letter-row format)
275
- input = IO.read(filename)
276
- tm = []
277
- input.each_line { |line|
278
- l_a = line.split
279
- begin
280
- l_a = l_a.collect { |a_i| Float(a_i) }
281
- rescue
282
- next
283
- end
284
- tm << l_a
285
- }
286
- tm = tm.transpose if tm.size == 4
287
- matrix = PM.new_matrix(tm.size)
288
- tm.each_index { |i| ['A', 'C', 'G', 'T'].each_with_index { |l, j| matrix[l][i] = tm[i][j] } }
289
-
290
- ppm_mode = (0...tm.size).inject(true) { |ppm_ya, i| ppm_ya &= col_sum(matrix, i).round == 1 }
291
-
292
- return ppm_mode ? PPM.new(tm.size, matrix) : PM.new(tm.size, matrix)
293
- end
294
-
295
- def save(filename)
296
- File.open(filename, "w") { |out_f|
297
- case File.ext_wo_name(filename)
298
- when "pwm"
299
- ['A', 'C', 'G', 'T'].each { |letter|
300
- @matrix[letter].each { |e|
301
- out_f << "#{e} "
302
- }
303
- out_f << $/
304
- }
305
- when "pat"
306
- out_f.puts File.name_wo_ext(filename)
307
- (0...@size).each { |i|
308
- ['A', 'C', 'G', 'T'].each { |letter|
309
- out_f << "#{@matrix[letter][i]} "
310
- }
311
- out_f << $/
312
- }
313
- when "xml"
314
- checkerr("small-BiSMark is not supported at this moment")
315
- else
316
- checkerr("unknown motif file format specified")
317
- end
318
- }
319
- end
320
-
321
- def positiv!
322
- min = @matrix.values.collect { |v| v.min }.min.abs
323
- @matrix.each_value { |v| (0...v.size).each { |i| v[i] += min } }
324
- return self
325
- end
326
-
327
- def revcomp!
328
- @matrix['A'], @matrix['T'] = @matrix['T'], @matrix['A']
329
- @matrix['C'], @matrix['G'] = @matrix['G'], @matrix['C']
330
- @matrix.each_value { |v| v.reverse! }
331
- self
332
- end
333
-
334
- def to_bismark(b)
335
- pwm = @matrix['A'][0].is_a?(Float)
336
- attributes = {"length" => @size}
337
- attributes["words-count"] = @words_count if @words_count && @words_count > 0
338
- pe = b.add_element( pwm ? "PWM" : "PCM", attributes )
339
- each_position_index do |i|
340
- pm_c = pe.add_element("pm-column", {"position" => i+1})
341
- ['A', 'C', 'G', 'T'].each { |l|
342
- pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
343
- }
344
- end
345
- end
346
-
347
- def PM.from_bismark(b, iupacomp = false)
348
-
349
- checkerr("empty small-BiSMark file?") { !b }
350
- float_m = (b.name == "PPM" || b.name == "PWM" || b.name == "WPCM")
351
- words_count = b.attributes["words-count"] ? b.attributes["words-count"].to_f : nil
352
-
353
- matrix = {"A" => [], "C" => [], "G" => [], "T" => []}
354
- b.elements.each("pm-column") { |pmc|
355
- position = pmc.attributes["position"].to_i
356
- ['A', 'C', 'G', 'T'].each { |l| matrix[l][position-1] = float_m ? pmc.elements[l.downcase].get_text.to_s.to_f : pmc.elements[l.downcase].get_text.to_s.to_i }
357
- }
358
- if b.name == "PPM"
359
- newppm = PPM.new(matrix['A'].size, matrix, words_count)
360
- newppm.iupacomp! if iupacomp
361
- return newppm
362
- end
363
- if b.name == "PCM"
364
- @words_count = col_sum(matrix)
365
- newpcm = PM.new(matrix['A'].size, matrix, words_count)
366
- newpcm.iupacomp! if iupacomp
367
- return newpcm
368
- end
369
- if b.name == "PWM" && iupacomp
370
- raise "cannot force IUPAC compatible PWM"
371
- end
372
- return PM.new(matrix['A'].size, matrix, words_count)
373
- end
374
-
375
- IUPAC_LS = (IUPAC::CODE.keys - ['A','C','G','T']).collect { |iul| [IUPAC::CODE[iul], iul.split(//)] }
376
- def iupacomp!
377
- @words_count = (0...@size).collect { |i| col_sum(i) }.max unless @words_count # for unbalanced matrices (Genomatix has some)
378
- # @words_count = @words_count.round < 2.0 ? nil : @words_count.round
379
-
380
- IUPAC_LS.each { |iul_ls|
381
- @matrix[iul_ls[0]] = (0...@size).collect { |i| col_sum(i, iul_ls[1]) / iul_ls[1].size }
382
- }
383
-
384
- return self
385
- end
386
-
387
- def m3sd(bckgr = Randoom::DEF_PROBS)
388
-
389
- mean = (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
390
- dev = (0...@size).inject(0.0) { |m2, i|
391
- deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2
392
- m2 += deltai
393
- }
394
- sigma = Math.sqrt(dev)
395
-
396
- mean+3*sigma
397
- end
398
-
399
- def fixwc
400
- return unless @words_count
401
- @words_count = (0...@size).collect { |i| col_sum(i) }.max
402
- end
403
-
404
- protected
405
- def PM.new_matrix(size)
406
- return {
407
- 'A' => Array.new(size),
408
- 'C' => Array.new(size),
409
- 'G' => Array.new(size),
410
- 'T' => Array.new(size) }
411
- end
412
-
413
- def PM.new_matrix_iupac(size)
414
- return {
415
- 'A' => Array.new(size),
416
- 'C' => Array.new(size),
417
- 'G' => Array.new(size),
418
- 'T' => Array.new(size),
419
- 'R' => Array.new(size),
420
- 'Y' => Array.new(size),
421
- 'K' => Array.new(size),
422
- 'M' => Array.new(size),
423
- 'S' => Array.new(size),
424
- 'W' => Array.new(size),
425
- 'B' => Array.new(size),
426
- 'D' => Array.new(size),
427
- 'H' => Array.new(size),
428
- 'V' => Array.new(size),
429
- 'N' => Array.new(size)
430
- }
431
- end
432
-
433
- end
434
-
435
- class PPM < PM
436
-
437
- #DEPRECATED, use iupacomp! instead
438
- #def make_N_comp!
439
- # @matrix['N'] = (0...size).collect { 0.25 }
440
- # return self
441
- #end
442
-
443
- def initialize(size, matrix = nil, words_count = nil)
444
- checkerr("matrix['A'].size != size") { matrix != nil && size != matrix['A'].size }
445
- @size = size
446
- @matrix = matrix == nil ? PM.new_matrix(size) : matrix
447
- @words_count = words_count
448
- end
449
-
450
- def iupacomp!
451
- @words_count = 4.0 unless @words_count
452
-
453
- IUPAC_LS.each { |iul_ls|
454
- @matrix[iul_ls[0]] = (0...@size).collect { |i| col_sum(i, iul_ls[1]) / iul_ls[1].size }
455
- }
456
-
457
- return self
458
- end
459
-
460
- def score(word)
461
- checkerr("word size != ppm.size") { @size != word.size }
462
- checkerr("word #{word} has strange characters") {
463
- @matrix.keys.include?('N') ? word.tr('ACGTRYKMSWBDHVN', '').size > 0 : word.tr('ACGT', '').size > 0
464
- }
465
- return (0...@size).inject(1) { |mul, i|
466
- mul *= @matrix[word[i,1]][i]
467
- }
468
- end
469
-
470
- def best_score
471
- return (0...size).inject(1) { |mul, i|
472
- mul *= ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max
473
- }
474
- end
475
-
476
- def worst_score
477
- return (0...size).inject(0) { |mul, i|
478
- mul *= ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.min
479
- }
480
- end
481
-
482
- def to_bismark(b)
483
- attributes = {"length" => @size}
484
- attributes["words-count"] = @words_count if @words_count
485
- pe = b.add_element("PPM", attributes)
486
- (0...@matrix['A'].size).each { |i|
487
- pm_c = pe.add_element("pm-column", {"position" => i+1})
488
- ['A', 'C', 'G', 'T'].each { |l|
489
- pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
490
- }
491
- }
492
- end
493
-
494
- def PPM.probs2IUPAC!(probs)
495
- IUPAC_LS.each { |iul_ls|
496
- probs[iul_ls[0]] = iul_ls[1].inject(0) { |sum, l| sum += probs[l] } / iul_ls[1].size
497
- }
498
- return probs
499
- end
500
-
501
- def get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1.0)
502
-
503
- probs = PPM.probs2IUPAC!(probs.dup)
504
-
505
- words_count = @words_count if !words_count || words_count == 0
506
- checkerr("undefined words count") { !words_count }
507
-
508
- pwm = @matrix['N'] ? PM.new_matrix_iupac(@size) : PM.new_matrix(@size)
509
-
510
- @matrix.each_key do |letter|
511
- (0...@size).each { |pos|
512
-
513
- pwm[letter][pos] = Math::log( (@matrix[letter][pos] * words_count + (probs[letter] * pseudocount) ) / ( (words_count + pseudocount) * probs[letter]) )
514
-
515
- }
516
- end
517
- return PM.new(@size, pwm, words_count)
518
- #pcm = get_pcm(words_count)
519
- #pcm.iupacomp! if @matrix['N']
520
- #return pcm.to_pwm!(words_count, probs, pseudocount)
521
- end
522
- alias to_pwm get_pwm
523
-
524
- def get_pwm0pc(probs = Randoom::DEF_PROBS)
525
- new_matrix = {}
526
- @matrix.each_key { |letter| new_matrix[letter] = @matrix[letter].dup }
527
- newpm = PM.new(@size, new_matrix, nil)
528
-
529
- new_matrix.each_key do |letter|
530
- (0...@size).each { |pos|
531
- new_matrix[letter][pos] = Math::log(@matrix[letter][pos] / probs[letter])
532
- }
533
- end
534
-
535
- return newpm
536
- end
537
-
538
- def to_pwm!
539
- raise "cannot force PPM class to PWM, use to_pwm instead"
540
- end
541
-
542
- def get_pcm(words_count = nil)
543
- words_count = @words_count unless words_count
544
- checkerr("undefined words count") { !words_count }
545
- counts = PM.new_matrix(@size)
546
- (0...size).each { |i|
547
- ['A', 'C', 'G', 'T'].each { |l|
548
- counts[l][i] = @matrix[l][i] * words_count
549
- }
550
- }
551
- newpcm = PM.new(size, counts, words_count).iupacomp!
552
- return newpcm
553
- end
554
- alias to_pcm get_pcm
555
-
556
- def PPM.from_IUPAC(iupac)
557
- matrix = {"A" => [], "C" => [], "G" => [], "T" => []}
558
-
559
- (0...iupac.size).each { |i|
560
- matrix.each_key { |k| matrix[k] << 0.0 }
561
- letters = IUPAC::REVCODE[iupac[i]]
562
- (0...letters.size).each { |j|
563
- matrix[letters[j]][-1] = 1.0/letters.size
564
- }
565
- }
566
-
567
- newppm = PPM.new(iupac.size, matrix, 4.0)
568
- newppm.iupacomp!
569
-
570
- newppm
571
- end
572
-
573
- end
574
- end