sequence_logo 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +21 -21
- data/README.md +65 -65
- data/Rakefile +5 -5
- data/TODO.txt +7 -7
- data/bin/glue_logos +2 -2
- data/bin/sequence_logo +2 -2
- data/lib/sequence_logo/cli.rb +36 -36
- data/lib/sequence_logo/exec/glue_logos.rb +97 -66
- data/lib/sequence_logo/exec/sequence_logo.rb +51 -51
- data/lib/sequence_logo/pmflogo_lib.rb +113 -113
- data/lib/sequence_logo/version.rb +3 -3
- data/lib/sequence_logo/ytilib/addon.rb +246 -246
- data/lib/sequence_logo/ytilib/bismark.rb +70 -70
- data/lib/sequence_logo/ytilib/hack1.rb +75 -75
- data/lib/sequence_logo/ytilib/infocod.rb +108 -108
- data/lib/sequence_logo/ytilib/iupac.rb +92 -92
- data/lib/sequence_logo/ytilib/pm.rb +562 -562
- data/lib/sequence_logo/ytilib/pmsd.rb +98 -98
- data/lib/sequence_logo/ytilib/ppm_support.rb +85 -85
- data/lib/sequence_logo/ytilib/randoom.rb +131 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +146 -146
- data/lib/sequence_logo/ytilib.rb +9 -9
- data/lib/sequence_logo.rb +7 -7
- data/sequence_logo.gemspec +21 -21
- data/test/data/pcm/AHR_si.pcm +10 -10
- data/test/data/pcm/AIRE_f2.pcm +19 -19
- metadata +3 -4
@@ -1,75 +1,75 @@
|
|
1
|
-
require 'rexml/formatters/pretty'
|
2
|
-
|
3
|
-
module REXML
|
4
|
-
module Formatters
|
5
|
-
# The Transitive formatter writes an XML document that parses to an
|
6
|
-
# identical document as the source document. This means that no extra
|
7
|
-
# whitespace nodes are inserted, and whitespace within text nodes is
|
8
|
-
# preserved. Within these constraints, the document is pretty-printed,
|
9
|
-
# with whitespace inserted into the metadata to introduce formatting.
|
10
|
-
#
|
11
|
-
# Note that this is only useful if the original XML is not already
|
12
|
-
# formatted. Since this formatter does not alter whitespace nodes, the
|
13
|
-
# results of formatting already formatted XML will be odd.
|
14
|
-
class Transitive < Default
|
15
|
-
def initialize( indentation=2 )
|
16
|
-
@indentation = indentation
|
17
|
-
@level = 0
|
18
|
-
end
|
19
|
-
|
20
|
-
protected
|
21
|
-
def write_element( node, output )
|
22
|
-
output << "\n" << ' '*@level
|
23
|
-
output << "<#{node.expanded_name}"
|
24
|
-
|
25
|
-
node.attributes.each_attribute do |attr|
|
26
|
-
output << " "
|
27
|
-
attr.write( output )
|
28
|
-
end unless node.attributes.empty?
|
29
|
-
|
30
|
-
if node.children.empty?
|
31
|
-
output << "/>"
|
32
|
-
else
|
33
|
-
output << ">"
|
34
|
-
# If compact and all children are text, and if the formatted output
|
35
|
-
# is less than the specified width, then try to print everything on
|
36
|
-
# one line
|
37
|
-
skip = false
|
38
|
-
@level += @indentation
|
39
|
-
|
40
|
-
only_text = true
|
41
|
-
|
42
|
-
node.children.each { |child|
|
43
|
-
only_text = child.is_a?(REXML::Text) && only_text
|
44
|
-
write( child, output )
|
45
|
-
}
|
46
|
-
@level -= @indentation
|
47
|
-
output << "#{only_text ? "" : "\n" + ' '*@level}" << "</#{node.expanded_name}>"
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
|
52
|
-
def write_text( node, output )
|
53
|
-
output << node.to_s()
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
class Document
|
59
|
-
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
|
60
|
-
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
61
|
-
output = Output.new( output, xml_decl.encoding )
|
62
|
-
end
|
63
|
-
formatter = if indent > -1
|
64
|
-
if trans
|
65
|
-
REXML::Formatters::Transitive.new( indent )
|
66
|
-
else
|
67
|
-
REXML::Formatters::Pretty.new( indent, ie_hack )
|
68
|
-
end
|
69
|
-
else
|
70
|
-
REXML::Formatters::Default.new( ie_hack )
|
71
|
-
end
|
72
|
-
formatter.write( self, output )
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
1
|
+
require 'rexml/formatters/pretty'
|
2
|
+
|
3
|
+
module REXML
|
4
|
+
module Formatters
|
5
|
+
# The Transitive formatter writes an XML document that parses to an
|
6
|
+
# identical document as the source document. This means that no extra
|
7
|
+
# whitespace nodes are inserted, and whitespace within text nodes is
|
8
|
+
# preserved. Within these constraints, the document is pretty-printed,
|
9
|
+
# with whitespace inserted into the metadata to introduce formatting.
|
10
|
+
#
|
11
|
+
# Note that this is only useful if the original XML is not already
|
12
|
+
# formatted. Since this formatter does not alter whitespace nodes, the
|
13
|
+
# results of formatting already formatted XML will be odd.
|
14
|
+
class Transitive < Default
|
15
|
+
def initialize( indentation=2 )
|
16
|
+
@indentation = indentation
|
17
|
+
@level = 0
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
def write_element( node, output )
|
22
|
+
output << "\n" << ' '*@level
|
23
|
+
output << "<#{node.expanded_name}"
|
24
|
+
|
25
|
+
node.attributes.each_attribute do |attr|
|
26
|
+
output << " "
|
27
|
+
attr.write( output )
|
28
|
+
end unless node.attributes.empty?
|
29
|
+
|
30
|
+
if node.children.empty?
|
31
|
+
output << "/>"
|
32
|
+
else
|
33
|
+
output << ">"
|
34
|
+
# If compact and all children are text, and if the formatted output
|
35
|
+
# is less than the specified width, then try to print everything on
|
36
|
+
# one line
|
37
|
+
skip = false
|
38
|
+
@level += @indentation
|
39
|
+
|
40
|
+
only_text = true
|
41
|
+
|
42
|
+
node.children.each { |child|
|
43
|
+
only_text = child.is_a?(REXML::Text) && only_text
|
44
|
+
write( child, output )
|
45
|
+
}
|
46
|
+
@level -= @indentation
|
47
|
+
output << "#{only_text ? "" : "\n" + ' '*@level}" << "</#{node.expanded_name}>"
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
def write_text( node, output )
|
53
|
+
output << node.to_s()
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class Document
|
59
|
+
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
|
60
|
+
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
61
|
+
output = Output.new( output, xml_decl.encoding )
|
62
|
+
end
|
63
|
+
formatter = if indent > -1
|
64
|
+
if trans
|
65
|
+
REXML::Formatters::Transitive.new( indent )
|
66
|
+
else
|
67
|
+
REXML::Formatters::Pretty.new( indent, ie_hack )
|
68
|
+
end
|
69
|
+
else
|
70
|
+
REXML::Formatters::Default.new( ie_hack )
|
71
|
+
end
|
72
|
+
formatter.write( self, output )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -1,108 +1,108 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
class Float
|
4
|
-
# Using Stieltjes formula from http://www.luschny.de/math/factorial/approx/SimpleCases.html
|
5
|
-
def log_fact
|
6
|
-
return 0.0 if self <= 1
|
7
|
-
a0 = 1.0/12
|
8
|
-
a1 = 1.0/30
|
9
|
-
a2 = 53.0/210
|
10
|
-
a3 = 195.0/371
|
11
|
-
a4 = 22999.0/22737
|
12
|
-
a5 = 29944523.0/19733142
|
13
|
-
a6 = 109535241009.0/48264275462
|
14
|
-
z_big = self+1;
|
15
|
-
(1.0/2)*Math.log(2*Math::PI)+(z_big-1.0/2)*Math.log(z_big)-z_big + a0/(z_big+a1/(z_big+a2/(z_big+a3/(z_big+a4/(z_big+a5/(z_big+a6/z_big))))))
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
class Integer
|
20
|
-
def log_fact
|
21
|
-
self.to_f.log_fact
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
# Naive version
|
26
|
-
=begin
|
27
|
-
class Integer
|
28
|
-
@@fact_hash = {}
|
29
|
-
def log_fact
|
30
|
-
return 0.0 if self == 0
|
31
|
-
return nil if self < 0
|
32
|
-
if self <= 170
|
33
|
-
@@fact_hash[self] = Math.log( lambda { |k| return k if self.times { |i| k *= i.next } }.call(1) )
|
34
|
-
else
|
35
|
-
return self.to_f.log_fact
|
36
|
-
end unless @@fact_hash.has_key?(self)
|
37
|
-
return @@fact_hash[self]
|
38
|
-
end
|
39
|
-
end
|
40
|
-
=end
|
41
|
-
|
42
|
-
module Ytilib
|
43
|
-
class PM
|
44
|
-
def infocod(position = nil)
|
45
|
-
return infocod_private(position) if position
|
46
|
-
(0...@size).collect { |i| infocod_private(i) }
|
47
|
-
end
|
48
|
-
alias icd infocod
|
49
|
-
|
50
|
-
def icd2of4(floor = false)
|
51
|
-
i2o4 = @words_count / 2.0
|
52
|
-
i2o4 = i2o4.floor if floor
|
53
|
-
([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
54
|
-
# 0 is equal to @words_count % 2, because 0! = 1!
|
55
|
-
end
|
56
|
-
|
57
|
-
def icd3of4(floor = false)
|
58
|
-
i3o4 = @words_count / 3.0
|
59
|
-
i3o4 = i3o4.floor if floor
|
60
|
-
addon = floor ? @words_count % 3 : 0
|
61
|
-
([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
62
|
-
end
|
63
|
-
|
64
|
-
def icdThc
|
65
|
-
icd3of4
|
66
|
-
end
|
67
|
-
|
68
|
-
def icdTlc
|
69
|
-
io = @words_count / 6.0
|
70
|
-
([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
71
|
-
end
|
72
|
-
|
73
|
-
def icd4of4(floor = false)
|
74
|
-
i4o4 = @words_count / 4.0
|
75
|
-
i4o4 = i4o4.floor if floor
|
76
|
-
([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
77
|
-
end
|
78
|
-
|
79
|
-
protected
|
80
|
-
def infocod_private(position)
|
81
|
-
k_i = ['A','C','G','T'].collect { |letter| @matrix[letter][position] }
|
82
|
-
( k_i.inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
class PPM
|
87
|
-
def to_pcm(words_count = nil)
|
88
|
-
@words_count = words_count if words_count
|
89
|
-
checkerr("words count is not specified") { !@words_count }
|
90
|
-
counts = PM.new_matrix(@size)
|
91
|
-
(0...size).each { |i|
|
92
|
-
['A', 'C', 'G', 'T'].each { |l|
|
93
|
-
counts[l][i] = @matrix[l][i] * @words_count
|
94
|
-
}
|
95
|
-
}
|
96
|
-
return PM.new(size, counts)
|
97
|
-
end
|
98
|
-
alias to_pcm get_pcm
|
99
|
-
|
100
|
-
def infocod(position = nil)
|
101
|
-
return to_pcm.infocod(position)
|
102
|
-
end
|
103
|
-
|
104
|
-
def icd(position = nil)
|
105
|
-
return to_pcm.infocod(position)
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
class Float
|
4
|
+
# Using Stieltjes formula from http://www.luschny.de/math/factorial/approx/SimpleCases.html
|
5
|
+
def log_fact
|
6
|
+
return 0.0 if self <= 1
|
7
|
+
a0 = 1.0/12
|
8
|
+
a1 = 1.0/30
|
9
|
+
a2 = 53.0/210
|
10
|
+
a3 = 195.0/371
|
11
|
+
a4 = 22999.0/22737
|
12
|
+
a5 = 29944523.0/19733142
|
13
|
+
a6 = 109535241009.0/48264275462
|
14
|
+
z_big = self+1;
|
15
|
+
(1.0/2)*Math.log(2*Math::PI)+(z_big-1.0/2)*Math.log(z_big)-z_big + a0/(z_big+a1/(z_big+a2/(z_big+a3/(z_big+a4/(z_big+a5/(z_big+a6/z_big))))))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class Integer
|
20
|
+
def log_fact
|
21
|
+
self.to_f.log_fact
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Naive version
|
26
|
+
=begin
|
27
|
+
class Integer
|
28
|
+
@@fact_hash = {}
|
29
|
+
def log_fact
|
30
|
+
return 0.0 if self == 0
|
31
|
+
return nil if self < 0
|
32
|
+
if self <= 170
|
33
|
+
@@fact_hash[self] = Math.log( lambda { |k| return k if self.times { |i| k *= i.next } }.call(1) )
|
34
|
+
else
|
35
|
+
return self.to_f.log_fact
|
36
|
+
end unless @@fact_hash.has_key?(self)
|
37
|
+
return @@fact_hash[self]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
=end
|
41
|
+
|
42
|
+
module Ytilib
|
43
|
+
class PM
|
44
|
+
def infocod(position = nil)
|
45
|
+
return infocod_private(position) if position
|
46
|
+
(0...@size).collect { |i| infocod_private(i) }
|
47
|
+
end
|
48
|
+
alias icd infocod
|
49
|
+
|
50
|
+
def icd2of4(floor = false)
|
51
|
+
i2o4 = @words_count / 2.0
|
52
|
+
i2o4 = i2o4.floor if floor
|
53
|
+
([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
54
|
+
# 0 is equal to @words_count % 2, because 0! = 1!
|
55
|
+
end
|
56
|
+
|
57
|
+
def icd3of4(floor = false)
|
58
|
+
i3o4 = @words_count / 3.0
|
59
|
+
i3o4 = i3o4.floor if floor
|
60
|
+
addon = floor ? @words_count % 3 : 0
|
61
|
+
([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
62
|
+
end
|
63
|
+
|
64
|
+
def icdThc
|
65
|
+
icd3of4
|
66
|
+
end
|
67
|
+
|
68
|
+
def icdTlc
|
69
|
+
io = @words_count / 6.0
|
70
|
+
([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
71
|
+
end
|
72
|
+
|
73
|
+
def icd4of4(floor = false)
|
74
|
+
i4o4 = @words_count / 4.0
|
75
|
+
i4o4 = i4o4.floor if floor
|
76
|
+
([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
def infocod_private(position)
|
81
|
+
k_i = ['A','C','G','T'].collect { |letter| @matrix[letter][position] }
|
82
|
+
( k_i.inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class PPM
|
87
|
+
def to_pcm(words_count = nil)
|
88
|
+
@words_count = words_count if words_count
|
89
|
+
checkerr("words count is not specified") { !@words_count }
|
90
|
+
counts = PM.new_matrix(@size)
|
91
|
+
(0...size).each { |i|
|
92
|
+
['A', 'C', 'G', 'T'].each { |l|
|
93
|
+
counts[l][i] = @matrix[l][i] * @words_count
|
94
|
+
}
|
95
|
+
}
|
96
|
+
return PM.new(size, counts)
|
97
|
+
end
|
98
|
+
alias to_pcm get_pcm
|
99
|
+
|
100
|
+
def infocod(position = nil)
|
101
|
+
return to_pcm.infocod(position)
|
102
|
+
end
|
103
|
+
|
104
|
+
def icd(position = nil)
|
105
|
+
return to_pcm.infocod(position)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -1,92 +1,92 @@
|
|
1
|
-
class IUPAC < String
|
2
|
-
CODE = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
|
3
|
-
"AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
|
4
|
-
"CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
|
5
|
-
REVCODE = CODE.invert
|
6
|
-
|
7
|
-
def dup
|
8
|
-
IUPAC.new(self)
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(words)
|
12
|
-
if words.is_a?(Array)
|
13
|
-
iupac = (0...words[0].size).collect { |i|
|
14
|
-
(0...words.size).collect { |j| words[j][i,1] }.uniq.sort.inject("") { |cola, letter| cola += letter }
|
15
|
-
}.inject("") { |iup, cola|
|
16
|
-
checkerr("bad letter set #{cola}") { !CODE.has_key?(cola) }
|
17
|
-
iup += CODE[cola]
|
18
|
-
}
|
19
|
-
super(iupac)
|
20
|
-
elsif words.is_a?(IUPAC)
|
21
|
-
super(words)
|
22
|
-
elsif words.is_a?(String)
|
23
|
-
checkerr("word #{words} has strange characters") { words.tr('ACGTURYKMSWBDHVN', '').size > 0 }
|
24
|
-
super(words)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def ==(iupac)
|
29
|
-
return false if self.size != iupac.size
|
30
|
-
(0...self.size).inject(true) { |result, i| result &= IUPACOM[self[i,1]][iupac[i,1]] }
|
31
|
-
end
|
32
|
-
|
33
|
-
def merge(iupac)
|
34
|
-
return nil if self.size != iupac.size
|
35
|
-
res = (0...self.size).inject("") { |res, i|
|
36
|
-
merges = REVCODE[self[i,1]].split(//).concat(REVCODE[iupac[i,1]].split(//)).uniq.sort.inject("") { |s, c| s += c}
|
37
|
-
res << CODE[merges]
|
38
|
-
}
|
39
|
-
return IUPAC.new(res)
|
40
|
-
end
|
41
|
-
|
42
|
-
def include?(iupac)
|
43
|
-
return false if self.size < iupac.size || !iupac.is_a?(IUPAC)
|
44
|
-
(0..self.size-iupac.size).each { |i|
|
45
|
-
return i if IUPAC.new(self[i,iupac.size]) == iupac
|
46
|
-
}
|
47
|
-
return false
|
48
|
-
end
|
49
|
-
|
50
|
-
def compl
|
51
|
-
return self.tr("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
52
|
-
end
|
53
|
-
|
54
|
-
def compl!
|
55
|
-
self.tr!("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
56
|
-
return self
|
57
|
-
end
|
58
|
-
|
59
|
-
alias reverse_string reverse
|
60
|
-
def reverse
|
61
|
-
return IUPAC.new(reverse_string)
|
62
|
-
end
|
63
|
-
|
64
|
-
alias comp! compl!
|
65
|
-
alias complement! compl!
|
66
|
-
alias comp compl
|
67
|
-
alias complement compl
|
68
|
-
|
69
|
-
private
|
70
|
-
IUPACOM = { "A" => {"A" => :llib, "R" => :llib, "M" => :llib, "W" => :llib, "D" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
71
|
-
"C" => {"C" => :llib, "Y" => :llib, "M" => :llib, "S" => :llib, "B" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
72
|
-
"G" => {"G" => :llib, "R" => :llib, "K" => :llib, "S" => :llib, "B" => :llib, "D" => :llib, "V" => :llib, "N" => :llib},
|
73
|
-
"T" => {"T" => :llib, "Y" => :llib, "K" => :llib, "W" => :llib, "B" => :llib, "D" => :llib, "H" => :llib, "N" => :llib}
|
74
|
-
}
|
75
|
-
IUPACOM["R"] = IUPACOM["G"].merge(IUPACOM["A"])
|
76
|
-
IUPACOM["Y"] = IUPACOM["T"].merge(IUPACOM["C"])
|
77
|
-
IUPACOM["K"] = IUPACOM["G"].merge(IUPACOM["T"])
|
78
|
-
IUPACOM["M"] = IUPACOM["A"].merge(IUPACOM["C"])
|
79
|
-
IUPACOM["S"] = IUPACOM["G"].merge(IUPACOM["C"])
|
80
|
-
IUPACOM["W"] = IUPACOM["A"].merge(IUPACOM["T"])
|
81
|
-
IUPACOM["B"] = IUPACOM["G"].merge(IUPACOM["T"].merge(IUPACOM["C"]))
|
82
|
-
IUPACOM["D"] = IUPACOM["G"].merge(IUPACOM["A"].merge(IUPACOM["T"]))
|
83
|
-
IUPACOM["H"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["T"]))
|
84
|
-
IUPACOM["V"] = IUPACOM["G"].merge(IUPACOM["C"].merge(IUPACOM["A"]))
|
85
|
-
IUPACOM["N"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["G"].merge(IUPACOM["T"])))
|
86
|
-
|
87
|
-
# IUPACMERGE = CODE.merge({
|
88
|
-
# "AA" => "A", "CC" => "C", "GG" => "G", "TT" => "T",
|
89
|
-
#
|
90
|
-
# })
|
91
|
-
|
92
|
-
end
|
1
|
+
class IUPAC < String
|
2
|
+
CODE = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
|
3
|
+
"AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
|
4
|
+
"CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
|
5
|
+
REVCODE = CODE.invert
|
6
|
+
|
7
|
+
def dup
|
8
|
+
IUPAC.new(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(words)
|
12
|
+
if words.is_a?(Array)
|
13
|
+
iupac = (0...words[0].size).collect { |i|
|
14
|
+
(0...words.size).collect { |j| words[j][i,1] }.uniq.sort.inject("") { |cola, letter| cola += letter }
|
15
|
+
}.inject("") { |iup, cola|
|
16
|
+
checkerr("bad letter set #{cola}") { !CODE.has_key?(cola) }
|
17
|
+
iup += CODE[cola]
|
18
|
+
}
|
19
|
+
super(iupac)
|
20
|
+
elsif words.is_a?(IUPAC)
|
21
|
+
super(words)
|
22
|
+
elsif words.is_a?(String)
|
23
|
+
checkerr("word #{words} has strange characters") { words.tr('ACGTURYKMSWBDHVN', '').size > 0 }
|
24
|
+
super(words)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def ==(iupac)
|
29
|
+
return false if self.size != iupac.size
|
30
|
+
(0...self.size).inject(true) { |result, i| result &= IUPACOM[self[i,1]][iupac[i,1]] }
|
31
|
+
end
|
32
|
+
|
33
|
+
def merge(iupac)
|
34
|
+
return nil if self.size != iupac.size
|
35
|
+
res = (0...self.size).inject("") { |res, i|
|
36
|
+
merges = REVCODE[self[i,1]].split(//).concat(REVCODE[iupac[i,1]].split(//)).uniq.sort.inject("") { |s, c| s += c}
|
37
|
+
res << CODE[merges]
|
38
|
+
}
|
39
|
+
return IUPAC.new(res)
|
40
|
+
end
|
41
|
+
|
42
|
+
def include?(iupac)
|
43
|
+
return false if self.size < iupac.size || !iupac.is_a?(IUPAC)
|
44
|
+
(0..self.size-iupac.size).each { |i|
|
45
|
+
return i if IUPAC.new(self[i,iupac.size]) == iupac
|
46
|
+
}
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
|
50
|
+
def compl
|
51
|
+
return self.tr("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
52
|
+
end
|
53
|
+
|
54
|
+
def compl!
|
55
|
+
self.tr!("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
56
|
+
return self
|
57
|
+
end
|
58
|
+
|
59
|
+
alias reverse_string reverse
|
60
|
+
def reverse
|
61
|
+
return IUPAC.new(reverse_string)
|
62
|
+
end
|
63
|
+
|
64
|
+
alias comp! compl!
|
65
|
+
alias complement! compl!
|
66
|
+
alias comp compl
|
67
|
+
alias complement compl
|
68
|
+
|
69
|
+
private
|
70
|
+
IUPACOM = { "A" => {"A" => :llib, "R" => :llib, "M" => :llib, "W" => :llib, "D" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
71
|
+
"C" => {"C" => :llib, "Y" => :llib, "M" => :llib, "S" => :llib, "B" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
72
|
+
"G" => {"G" => :llib, "R" => :llib, "K" => :llib, "S" => :llib, "B" => :llib, "D" => :llib, "V" => :llib, "N" => :llib},
|
73
|
+
"T" => {"T" => :llib, "Y" => :llib, "K" => :llib, "W" => :llib, "B" => :llib, "D" => :llib, "H" => :llib, "N" => :llib}
|
74
|
+
}
|
75
|
+
IUPACOM["R"] = IUPACOM["G"].merge(IUPACOM["A"])
|
76
|
+
IUPACOM["Y"] = IUPACOM["T"].merge(IUPACOM["C"])
|
77
|
+
IUPACOM["K"] = IUPACOM["G"].merge(IUPACOM["T"])
|
78
|
+
IUPACOM["M"] = IUPACOM["A"].merge(IUPACOM["C"])
|
79
|
+
IUPACOM["S"] = IUPACOM["G"].merge(IUPACOM["C"])
|
80
|
+
IUPACOM["W"] = IUPACOM["A"].merge(IUPACOM["T"])
|
81
|
+
IUPACOM["B"] = IUPACOM["G"].merge(IUPACOM["T"].merge(IUPACOM["C"]))
|
82
|
+
IUPACOM["D"] = IUPACOM["G"].merge(IUPACOM["A"].merge(IUPACOM["T"]))
|
83
|
+
IUPACOM["H"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["T"]))
|
84
|
+
IUPACOM["V"] = IUPACOM["G"].merge(IUPACOM["C"].merge(IUPACOM["A"]))
|
85
|
+
IUPACOM["N"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["G"].merge(IUPACOM["T"])))
|
86
|
+
|
87
|
+
# IUPACMERGE = CODE.merge({
|
88
|
+
# "AA" => "A", "CC" => "C", "GG" => "G", "TT" => "T",
|
89
|
+
#
|
90
|
+
# })
|
91
|
+
|
92
|
+
end
|