sequence_logo 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +21 -21
- data/README.md +65 -65
- data/Rakefile +5 -5
- data/TODO.txt +7 -7
- data/bin/glue_logos +2 -2
- data/bin/sequence_logo +2 -2
- data/lib/sequence_logo/cli.rb +36 -36
- data/lib/sequence_logo/exec/glue_logos.rb +97 -66
- data/lib/sequence_logo/exec/sequence_logo.rb +51 -51
- data/lib/sequence_logo/pmflogo_lib.rb +113 -113
- data/lib/sequence_logo/version.rb +3 -3
- data/lib/sequence_logo/ytilib/addon.rb +246 -246
- data/lib/sequence_logo/ytilib/bismark.rb +70 -70
- data/lib/sequence_logo/ytilib/hack1.rb +75 -75
- data/lib/sequence_logo/ytilib/infocod.rb +108 -108
- data/lib/sequence_logo/ytilib/iupac.rb +92 -92
- data/lib/sequence_logo/ytilib/pm.rb +562 -562
- data/lib/sequence_logo/ytilib/pmsd.rb +98 -98
- data/lib/sequence_logo/ytilib/ppm_support.rb +85 -85
- data/lib/sequence_logo/ytilib/randoom.rb +131 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +146 -146
- data/lib/sequence_logo/ytilib.rb +9 -9
- data/lib/sequence_logo.rb +7 -7
- data/sequence_logo.gemspec +21 -21
- data/test/data/pcm/AHR_si.pcm +10 -10
- data/test/data/pcm/AIRE_f2.pcm +19 -19
- metadata +3 -4
@@ -1,75 +1,75 @@
|
|
1
|
-
require 'rexml/formatters/pretty'
|
2
|
-
|
3
|
-
module REXML
|
4
|
-
module Formatters
|
5
|
-
# The Transitive formatter writes an XML document that parses to an
|
6
|
-
# identical document as the source document. This means that no extra
|
7
|
-
# whitespace nodes are inserted, and whitespace within text nodes is
|
8
|
-
# preserved. Within these constraints, the document is pretty-printed,
|
9
|
-
# with whitespace inserted into the metadata to introduce formatting.
|
10
|
-
#
|
11
|
-
# Note that this is only useful if the original XML is not already
|
12
|
-
# formatted. Since this formatter does not alter whitespace nodes, the
|
13
|
-
# results of formatting already formatted XML will be odd.
|
14
|
-
class Transitive < Default
|
15
|
-
def initialize( indentation=2 )
|
16
|
-
@indentation = indentation
|
17
|
-
@level = 0
|
18
|
-
end
|
19
|
-
|
20
|
-
protected
|
21
|
-
def write_element( node, output )
|
22
|
-
output << "\n" << ' '*@level
|
23
|
-
output << "<#{node.expanded_name}"
|
24
|
-
|
25
|
-
node.attributes.each_attribute do |attr|
|
26
|
-
output << " "
|
27
|
-
attr.write( output )
|
28
|
-
end unless node.attributes.empty?
|
29
|
-
|
30
|
-
if node.children.empty?
|
31
|
-
output << "/>"
|
32
|
-
else
|
33
|
-
output << ">"
|
34
|
-
# If compact and all children are text, and if the formatted output
|
35
|
-
# is less than the specified width, then try to print everything on
|
36
|
-
# one line
|
37
|
-
skip = false
|
38
|
-
@level += @indentation
|
39
|
-
|
40
|
-
only_text = true
|
41
|
-
|
42
|
-
node.children.each { |child|
|
43
|
-
only_text = child.is_a?(REXML::Text) && only_text
|
44
|
-
write( child, output )
|
45
|
-
}
|
46
|
-
@level -= @indentation
|
47
|
-
output << "#{only_text ? "" : "\n" + ' '*@level}" << "</#{node.expanded_name}>"
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
|
52
|
-
def write_text( node, output )
|
53
|
-
output << node.to_s()
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
class Document
|
59
|
-
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
|
60
|
-
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
61
|
-
output = Output.new( output, xml_decl.encoding )
|
62
|
-
end
|
63
|
-
formatter = if indent > -1
|
64
|
-
if trans
|
65
|
-
REXML::Formatters::Transitive.new( indent )
|
66
|
-
else
|
67
|
-
REXML::Formatters::Pretty.new( indent, ie_hack )
|
68
|
-
end
|
69
|
-
else
|
70
|
-
REXML::Formatters::Default.new( ie_hack )
|
71
|
-
end
|
72
|
-
formatter.write( self, output )
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
1
|
+
require 'rexml/formatters/pretty'
|
2
|
+
|
3
|
+
module REXML
|
4
|
+
module Formatters
|
5
|
+
# The Transitive formatter writes an XML document that parses to an
|
6
|
+
# identical document as the source document. This means that no extra
|
7
|
+
# whitespace nodes are inserted, and whitespace within text nodes is
|
8
|
+
# preserved. Within these constraints, the document is pretty-printed,
|
9
|
+
# with whitespace inserted into the metadata to introduce formatting.
|
10
|
+
#
|
11
|
+
# Note that this is only useful if the original XML is not already
|
12
|
+
# formatted. Since this formatter does not alter whitespace nodes, the
|
13
|
+
# results of formatting already formatted XML will be odd.
|
14
|
+
class Transitive < Default
|
15
|
+
def initialize( indentation=2 )
|
16
|
+
@indentation = indentation
|
17
|
+
@level = 0
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
def write_element( node, output )
|
22
|
+
output << "\n" << ' '*@level
|
23
|
+
output << "<#{node.expanded_name}"
|
24
|
+
|
25
|
+
node.attributes.each_attribute do |attr|
|
26
|
+
output << " "
|
27
|
+
attr.write( output )
|
28
|
+
end unless node.attributes.empty?
|
29
|
+
|
30
|
+
if node.children.empty?
|
31
|
+
output << "/>"
|
32
|
+
else
|
33
|
+
output << ">"
|
34
|
+
# If compact and all children are text, and if the formatted output
|
35
|
+
# is less than the specified width, then try to print everything on
|
36
|
+
# one line
|
37
|
+
skip = false
|
38
|
+
@level += @indentation
|
39
|
+
|
40
|
+
only_text = true
|
41
|
+
|
42
|
+
node.children.each { |child|
|
43
|
+
only_text = child.is_a?(REXML::Text) && only_text
|
44
|
+
write( child, output )
|
45
|
+
}
|
46
|
+
@level -= @indentation
|
47
|
+
output << "#{only_text ? "" : "\n" + ' '*@level}" << "</#{node.expanded_name}>"
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
def write_text( node, output )
|
53
|
+
output << node.to_s()
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class Document
|
59
|
+
def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
|
60
|
+
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
|
61
|
+
output = Output.new( output, xml_decl.encoding )
|
62
|
+
end
|
63
|
+
formatter = if indent > -1
|
64
|
+
if trans
|
65
|
+
REXML::Formatters::Transitive.new( indent )
|
66
|
+
else
|
67
|
+
REXML::Formatters::Pretty.new( indent, ie_hack )
|
68
|
+
end
|
69
|
+
else
|
70
|
+
REXML::Formatters::Default.new( ie_hack )
|
71
|
+
end
|
72
|
+
formatter.write( self, output )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -1,108 +1,108 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
class Float
|
4
|
-
# Using Stieltjes formula from http://www.luschny.de/math/factorial/approx/SimpleCases.html
|
5
|
-
def log_fact
|
6
|
-
return 0.0 if self <= 1
|
7
|
-
a0 = 1.0/12
|
8
|
-
a1 = 1.0/30
|
9
|
-
a2 = 53.0/210
|
10
|
-
a3 = 195.0/371
|
11
|
-
a4 = 22999.0/22737
|
12
|
-
a5 = 29944523.0/19733142
|
13
|
-
a6 = 109535241009.0/48264275462
|
14
|
-
z_big = self+1;
|
15
|
-
(1.0/2)*Math.log(2*Math::PI)+(z_big-1.0/2)*Math.log(z_big)-z_big + a0/(z_big+a1/(z_big+a2/(z_big+a3/(z_big+a4/(z_big+a5/(z_big+a6/z_big))))))
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
class Integer
|
20
|
-
def log_fact
|
21
|
-
self.to_f.log_fact
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
# Naive version
|
26
|
-
=begin
|
27
|
-
class Integer
|
28
|
-
@@fact_hash = {}
|
29
|
-
def log_fact
|
30
|
-
return 0.0 if self == 0
|
31
|
-
return nil if self < 0
|
32
|
-
if self <= 170
|
33
|
-
@@fact_hash[self] = Math.log( lambda { |k| return k if self.times { |i| k *= i.next } }.call(1) )
|
34
|
-
else
|
35
|
-
return self.to_f.log_fact
|
36
|
-
end unless @@fact_hash.has_key?(self)
|
37
|
-
return @@fact_hash[self]
|
38
|
-
end
|
39
|
-
end
|
40
|
-
=end
|
41
|
-
|
42
|
-
module Ytilib
|
43
|
-
class PM
|
44
|
-
def infocod(position = nil)
|
45
|
-
return infocod_private(position) if position
|
46
|
-
(0...@size).collect { |i| infocod_private(i) }
|
47
|
-
end
|
48
|
-
alias icd infocod
|
49
|
-
|
50
|
-
def icd2of4(floor = false)
|
51
|
-
i2o4 = @words_count / 2.0
|
52
|
-
i2o4 = i2o4.floor if floor
|
53
|
-
([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
54
|
-
# 0 is equal to @words_count % 2, because 0! = 1!
|
55
|
-
end
|
56
|
-
|
57
|
-
def icd3of4(floor = false)
|
58
|
-
i3o4 = @words_count / 3.0
|
59
|
-
i3o4 = i3o4.floor if floor
|
60
|
-
addon = floor ? @words_count % 3 : 0
|
61
|
-
([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
62
|
-
end
|
63
|
-
|
64
|
-
def icdThc
|
65
|
-
icd3of4
|
66
|
-
end
|
67
|
-
|
68
|
-
def icdTlc
|
69
|
-
io = @words_count / 6.0
|
70
|
-
([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
71
|
-
end
|
72
|
-
|
73
|
-
def icd4of4(floor = false)
|
74
|
-
i4o4 = @words_count / 4.0
|
75
|
-
i4o4 = i4o4.floor if floor
|
76
|
-
([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
77
|
-
end
|
78
|
-
|
79
|
-
protected
|
80
|
-
def infocod_private(position)
|
81
|
-
k_i = ['A','C','G','T'].collect { |letter| @matrix[letter][position] }
|
82
|
-
( k_i.inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
class PPM
|
87
|
-
def to_pcm(words_count = nil)
|
88
|
-
@words_count = words_count if words_count
|
89
|
-
checkerr("words count is not specified") { !@words_count }
|
90
|
-
counts = PM.new_matrix(@size)
|
91
|
-
(0...size).each { |i|
|
92
|
-
['A', 'C', 'G', 'T'].each { |l|
|
93
|
-
counts[l][i] = @matrix[l][i] * @words_count
|
94
|
-
}
|
95
|
-
}
|
96
|
-
return PM.new(size, counts)
|
97
|
-
end
|
98
|
-
alias to_pcm get_pcm
|
99
|
-
|
100
|
-
def infocod(position = nil)
|
101
|
-
return to_pcm.infocod(position)
|
102
|
-
end
|
103
|
-
|
104
|
-
def icd(position = nil)
|
105
|
-
return to_pcm.infocod(position)
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
class Float
|
4
|
+
# Using Stieltjes formula from http://www.luschny.de/math/factorial/approx/SimpleCases.html
|
5
|
+
def log_fact
|
6
|
+
return 0.0 if self <= 1
|
7
|
+
a0 = 1.0/12
|
8
|
+
a1 = 1.0/30
|
9
|
+
a2 = 53.0/210
|
10
|
+
a3 = 195.0/371
|
11
|
+
a4 = 22999.0/22737
|
12
|
+
a5 = 29944523.0/19733142
|
13
|
+
a6 = 109535241009.0/48264275462
|
14
|
+
z_big = self+1;
|
15
|
+
(1.0/2)*Math.log(2*Math::PI)+(z_big-1.0/2)*Math.log(z_big)-z_big + a0/(z_big+a1/(z_big+a2/(z_big+a3/(z_big+a4/(z_big+a5/(z_big+a6/z_big))))))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class Integer
|
20
|
+
def log_fact
|
21
|
+
self.to_f.log_fact
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Naive version
|
26
|
+
=begin
|
27
|
+
class Integer
|
28
|
+
@@fact_hash = {}
|
29
|
+
def log_fact
|
30
|
+
return 0.0 if self == 0
|
31
|
+
return nil if self < 0
|
32
|
+
if self <= 170
|
33
|
+
@@fact_hash[self] = Math.log( lambda { |k| return k if self.times { |i| k *= i.next } }.call(1) )
|
34
|
+
else
|
35
|
+
return self.to_f.log_fact
|
36
|
+
end unless @@fact_hash.has_key?(self)
|
37
|
+
return @@fact_hash[self]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
=end
|
41
|
+
|
42
|
+
module Ytilib
|
43
|
+
class PM
|
44
|
+
def infocod(position = nil)
|
45
|
+
return infocod_private(position) if position
|
46
|
+
(0...@size).collect { |i| infocod_private(i) }
|
47
|
+
end
|
48
|
+
alias icd infocod
|
49
|
+
|
50
|
+
def icd2of4(floor = false)
|
51
|
+
i2o4 = @words_count / 2.0
|
52
|
+
i2o4 = i2o4.floor if floor
|
53
|
+
([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
54
|
+
# 0 is equal to @words_count % 2, because 0! = 1!
|
55
|
+
end
|
56
|
+
|
57
|
+
def icd3of4(floor = false)
|
58
|
+
i3o4 = @words_count / 3.0
|
59
|
+
i3o4 = i3o4.floor if floor
|
60
|
+
addon = floor ? @words_count % 3 : 0
|
61
|
+
([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
62
|
+
end
|
63
|
+
|
64
|
+
def icdThc
|
65
|
+
icd3of4
|
66
|
+
end
|
67
|
+
|
68
|
+
def icdTlc
|
69
|
+
io = @words_count / 6.0
|
70
|
+
([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
71
|
+
end
|
72
|
+
|
73
|
+
def icd4of4(floor = false)
|
74
|
+
i4o4 = @words_count / 4.0
|
75
|
+
i4o4 = i4o4.floor if floor
|
76
|
+
([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
def infocod_private(position)
|
81
|
+
k_i = ['A','C','G','T'].collect { |letter| @matrix[letter][position] }
|
82
|
+
( k_i.inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class PPM
|
87
|
+
def to_pcm(words_count = nil)
|
88
|
+
@words_count = words_count if words_count
|
89
|
+
checkerr("words count is not specified") { !@words_count }
|
90
|
+
counts = PM.new_matrix(@size)
|
91
|
+
(0...size).each { |i|
|
92
|
+
['A', 'C', 'G', 'T'].each { |l|
|
93
|
+
counts[l][i] = @matrix[l][i] * @words_count
|
94
|
+
}
|
95
|
+
}
|
96
|
+
return PM.new(size, counts)
|
97
|
+
end
|
98
|
+
alias to_pcm get_pcm
|
99
|
+
|
100
|
+
def infocod(position = nil)
|
101
|
+
return to_pcm.infocod(position)
|
102
|
+
end
|
103
|
+
|
104
|
+
def icd(position = nil)
|
105
|
+
return to_pcm.infocod(position)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -1,92 +1,92 @@
|
|
1
|
-
class IUPAC < String
|
2
|
-
CODE = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
|
3
|
-
"AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
|
4
|
-
"CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
|
5
|
-
REVCODE = CODE.invert
|
6
|
-
|
7
|
-
def dup
|
8
|
-
IUPAC.new(self)
|
9
|
-
end
|
10
|
-
|
11
|
-
def initialize(words)
|
12
|
-
if words.is_a?(Array)
|
13
|
-
iupac = (0...words[0].size).collect { |i|
|
14
|
-
(0...words.size).collect { |j| words[j][i,1] }.uniq.sort.inject("") { |cola, letter| cola += letter }
|
15
|
-
}.inject("") { |iup, cola|
|
16
|
-
checkerr("bad letter set #{cola}") { !CODE.has_key?(cola) }
|
17
|
-
iup += CODE[cola]
|
18
|
-
}
|
19
|
-
super(iupac)
|
20
|
-
elsif words.is_a?(IUPAC)
|
21
|
-
super(words)
|
22
|
-
elsif words.is_a?(String)
|
23
|
-
checkerr("word #{words} has strange characters") { words.tr('ACGTURYKMSWBDHVN', '').size > 0 }
|
24
|
-
super(words)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def ==(iupac)
|
29
|
-
return false if self.size != iupac.size
|
30
|
-
(0...self.size).inject(true) { |result, i| result &= IUPACOM[self[i,1]][iupac[i,1]] }
|
31
|
-
end
|
32
|
-
|
33
|
-
def merge(iupac)
|
34
|
-
return nil if self.size != iupac.size
|
35
|
-
res = (0...self.size).inject("") { |res, i|
|
36
|
-
merges = REVCODE[self[i,1]].split(//).concat(REVCODE[iupac[i,1]].split(//)).uniq.sort.inject("") { |s, c| s += c}
|
37
|
-
res << CODE[merges]
|
38
|
-
}
|
39
|
-
return IUPAC.new(res)
|
40
|
-
end
|
41
|
-
|
42
|
-
def include?(iupac)
|
43
|
-
return false if self.size < iupac.size || !iupac.is_a?(IUPAC)
|
44
|
-
(0..self.size-iupac.size).each { |i|
|
45
|
-
return i if IUPAC.new(self[i,iupac.size]) == iupac
|
46
|
-
}
|
47
|
-
return false
|
48
|
-
end
|
49
|
-
|
50
|
-
def compl
|
51
|
-
return self.tr("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
52
|
-
end
|
53
|
-
|
54
|
-
def compl!
|
55
|
-
self.tr!("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
56
|
-
return self
|
57
|
-
end
|
58
|
-
|
59
|
-
alias reverse_string reverse
|
60
|
-
def reverse
|
61
|
-
return IUPAC.new(reverse_string)
|
62
|
-
end
|
63
|
-
|
64
|
-
alias comp! compl!
|
65
|
-
alias complement! compl!
|
66
|
-
alias comp compl
|
67
|
-
alias complement compl
|
68
|
-
|
69
|
-
private
|
70
|
-
IUPACOM = { "A" => {"A" => :llib, "R" => :llib, "M" => :llib, "W" => :llib, "D" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
71
|
-
"C" => {"C" => :llib, "Y" => :llib, "M" => :llib, "S" => :llib, "B" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
72
|
-
"G" => {"G" => :llib, "R" => :llib, "K" => :llib, "S" => :llib, "B" => :llib, "D" => :llib, "V" => :llib, "N" => :llib},
|
73
|
-
"T" => {"T" => :llib, "Y" => :llib, "K" => :llib, "W" => :llib, "B" => :llib, "D" => :llib, "H" => :llib, "N" => :llib}
|
74
|
-
}
|
75
|
-
IUPACOM["R"] = IUPACOM["G"].merge(IUPACOM["A"])
|
76
|
-
IUPACOM["Y"] = IUPACOM["T"].merge(IUPACOM["C"])
|
77
|
-
IUPACOM["K"] = IUPACOM["G"].merge(IUPACOM["T"])
|
78
|
-
IUPACOM["M"] = IUPACOM["A"].merge(IUPACOM["C"])
|
79
|
-
IUPACOM["S"] = IUPACOM["G"].merge(IUPACOM["C"])
|
80
|
-
IUPACOM["W"] = IUPACOM["A"].merge(IUPACOM["T"])
|
81
|
-
IUPACOM["B"] = IUPACOM["G"].merge(IUPACOM["T"].merge(IUPACOM["C"]))
|
82
|
-
IUPACOM["D"] = IUPACOM["G"].merge(IUPACOM["A"].merge(IUPACOM["T"]))
|
83
|
-
IUPACOM["H"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["T"]))
|
84
|
-
IUPACOM["V"] = IUPACOM["G"].merge(IUPACOM["C"].merge(IUPACOM["A"]))
|
85
|
-
IUPACOM["N"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["G"].merge(IUPACOM["T"])))
|
86
|
-
|
87
|
-
# IUPACMERGE = CODE.merge({
|
88
|
-
# "AA" => "A", "CC" => "C", "GG" => "G", "TT" => "T",
|
89
|
-
#
|
90
|
-
# })
|
91
|
-
|
92
|
-
end
|
1
|
+
class IUPAC < String
|
2
|
+
CODE = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
|
3
|
+
"AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
|
4
|
+
"CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
|
5
|
+
REVCODE = CODE.invert
|
6
|
+
|
7
|
+
def dup
|
8
|
+
IUPAC.new(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(words)
|
12
|
+
if words.is_a?(Array)
|
13
|
+
iupac = (0...words[0].size).collect { |i|
|
14
|
+
(0...words.size).collect { |j| words[j][i,1] }.uniq.sort.inject("") { |cola, letter| cola += letter }
|
15
|
+
}.inject("") { |iup, cola|
|
16
|
+
checkerr("bad letter set #{cola}") { !CODE.has_key?(cola) }
|
17
|
+
iup += CODE[cola]
|
18
|
+
}
|
19
|
+
super(iupac)
|
20
|
+
elsif words.is_a?(IUPAC)
|
21
|
+
super(words)
|
22
|
+
elsif words.is_a?(String)
|
23
|
+
checkerr("word #{words} has strange characters") { words.tr('ACGTURYKMSWBDHVN', '').size > 0 }
|
24
|
+
super(words)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def ==(iupac)
|
29
|
+
return false if self.size != iupac.size
|
30
|
+
(0...self.size).inject(true) { |result, i| result &= IUPACOM[self[i,1]][iupac[i,1]] }
|
31
|
+
end
|
32
|
+
|
33
|
+
def merge(iupac)
|
34
|
+
return nil if self.size != iupac.size
|
35
|
+
res = (0...self.size).inject("") { |res, i|
|
36
|
+
merges = REVCODE[self[i,1]].split(//).concat(REVCODE[iupac[i,1]].split(//)).uniq.sort.inject("") { |s, c| s += c}
|
37
|
+
res << CODE[merges]
|
38
|
+
}
|
39
|
+
return IUPAC.new(res)
|
40
|
+
end
|
41
|
+
|
42
|
+
def include?(iupac)
|
43
|
+
return false if self.size < iupac.size || !iupac.is_a?(IUPAC)
|
44
|
+
(0..self.size-iupac.size).each { |i|
|
45
|
+
return i if IUPAC.new(self[i,iupac.size]) == iupac
|
46
|
+
}
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
|
50
|
+
def compl
|
51
|
+
return self.tr("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
52
|
+
end
|
53
|
+
|
54
|
+
def compl!
|
55
|
+
self.tr!("ACGTRYKMSWBDHVN", "TGCAYRMKSWVHDBN")
|
56
|
+
return self
|
57
|
+
end
|
58
|
+
|
59
|
+
alias reverse_string reverse
|
60
|
+
def reverse
|
61
|
+
return IUPAC.new(reverse_string)
|
62
|
+
end
|
63
|
+
|
64
|
+
alias comp! compl!
|
65
|
+
alias complement! compl!
|
66
|
+
alias comp compl
|
67
|
+
alias complement compl
|
68
|
+
|
69
|
+
private
|
70
|
+
IUPACOM = { "A" => {"A" => :llib, "R" => :llib, "M" => :llib, "W" => :llib, "D" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
71
|
+
"C" => {"C" => :llib, "Y" => :llib, "M" => :llib, "S" => :llib, "B" => :llib, "H" => :llib, "V" => :llib, "N" => :llib},
|
72
|
+
"G" => {"G" => :llib, "R" => :llib, "K" => :llib, "S" => :llib, "B" => :llib, "D" => :llib, "V" => :llib, "N" => :llib},
|
73
|
+
"T" => {"T" => :llib, "Y" => :llib, "K" => :llib, "W" => :llib, "B" => :llib, "D" => :llib, "H" => :llib, "N" => :llib}
|
74
|
+
}
|
75
|
+
IUPACOM["R"] = IUPACOM["G"].merge(IUPACOM["A"])
|
76
|
+
IUPACOM["Y"] = IUPACOM["T"].merge(IUPACOM["C"])
|
77
|
+
IUPACOM["K"] = IUPACOM["G"].merge(IUPACOM["T"])
|
78
|
+
IUPACOM["M"] = IUPACOM["A"].merge(IUPACOM["C"])
|
79
|
+
IUPACOM["S"] = IUPACOM["G"].merge(IUPACOM["C"])
|
80
|
+
IUPACOM["W"] = IUPACOM["A"].merge(IUPACOM["T"])
|
81
|
+
IUPACOM["B"] = IUPACOM["G"].merge(IUPACOM["T"].merge(IUPACOM["C"]))
|
82
|
+
IUPACOM["D"] = IUPACOM["G"].merge(IUPACOM["A"].merge(IUPACOM["T"]))
|
83
|
+
IUPACOM["H"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["T"]))
|
84
|
+
IUPACOM["V"] = IUPACOM["G"].merge(IUPACOM["C"].merge(IUPACOM["A"]))
|
85
|
+
IUPACOM["N"] = IUPACOM["A"].merge(IUPACOM["C"].merge(IUPACOM["G"].merge(IUPACOM["T"])))
|
86
|
+
|
87
|
+
# IUPACMERGE = CODE.merge({
|
88
|
+
# "AA" => "A", "CC" => "C", "GG" => "G", "TT" => "T",
|
89
|
+
#
|
90
|
+
# })
|
91
|
+
|
92
|
+
end
|