sequence_logo 1.1.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,10 +0,0 @@
1
- require_relative 'ytilib/ytilib'
2
- require_relative 'ytilib/addon'
3
- require_relative 'ytilib/iupac'
4
- require_relative 'ytilib/pm'
5
- require_relative 'ytilib/pmsd'
6
- require_relative 'ytilib/randoom'
7
- require_relative 'ytilib/bismark'
8
- require_relative 'ytilib/hack1'
9
- require_relative 'ytilib/infocod'
10
- require_relative 'ytilib/ppm_support'
@@ -1,247 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- def File.ext_wo_name(what)
4
- return what if what.rindex(".") == nil
5
- what = File.basename(what)
6
- "#{what}"[what.rindex(".")+1..-1]
7
- end
8
-
9
- def File.name_wo_ext(what)
10
- return what if what.rindex(".") == nil
11
- what = File.basename(what)
12
- "#{what}"[0...what.rindex(".")]
13
- end
14
-
15
- class Float
16
- def round_to(x)
17
- (self * 10**x).round.to_f / 10**x
18
- end
19
-
20
- def cut_to(x)
21
- (self.abs * 10**x).floor.to_f * (self == 0.0 ? 0 : self/self.abs).round / 10**x
22
- end
23
- end
24
-
25
- class Array
26
- def shuffle
27
- arr = self.dup
28
- arr.size.downto 2 do |j|
29
- r = rand j
30
- arr[j-1], arr[r] = arr[r], arr[j-1]
31
- end
32
- arr
33
- end
34
-
35
- def shuffle!
36
- (size - 1).downto 1 do |i|
37
- j = rand(i + 1)
38
- self[i], self[j] = self[j], self[i]
39
- end
40
- self
41
- end
42
-
43
- def average
44
- self.empty? ? nil : self.inject(0) { |sum,s| sum += s } / self.size
45
- end
46
- alias mean average
47
-
48
- def variance
49
- return self.collect { |s| s*s }.average - average**2
50
- end
51
-
52
- def sum
53
- self.inject(self[0]) { |sum,s| sum += s} - self[0]
54
- end
55
-
56
- end
57
-
58
- class String
59
-
60
- def compl!
61
- self.tr!("acgtACGT", "tgcaTGCA")
62
- return self
63
- end
64
-
65
- def compl
66
- return self.tr("acgtACGT", "tgcaTGCA")
67
- end
68
-
69
- alias comp! compl!
70
- alias complement! compl!
71
- alias comp compl
72
- alias complement compl
73
-
74
- def revcomp
75
- return comp.reverse
76
- end
77
-
78
- def revcomp!
79
- return comp!.reverse!
80
- end
81
-
82
- def to_id
83
- return self.gsub(/[^.\w]/, '_').upcase
84
- end
85
-
86
- end
87
-
88
- # Also this can be done is a more sophisticated way
89
- =begin
90
- String.class_eval do
91
- def to_id
92
- return self.gsub(/[^.\w]/, '_')
93
- end
94
- end
95
- =end
96
-
97
- class String
98
- # The opposite of String::next / String::succ. It is impossible to be a
99
- # *complete* opposite because both "9".next = "10" and "09".next = "10";
100
- # if going backwards from "10" there's no way to know whether the result
101
- # should be "09" or "9". Where the first ranged character is about to
102
- # underflow and the next character is within the same range the result
103
- # is shrunk down - that is, "10" goes to "9", "aa" goes to "z"; any non-
104
- # range prefix or suffix is OK, e.g. "+!$%10-=+" goes to "+!$%9-=+".
105
- # Items in the middle of a string don't do this - e.g. "12.10" goes to
106
- # "12.09", to match how "next" would work as best as possible.
107
- #
108
- # The standard "next" function works on strings that contain *no*
109
- # alphanumeric characters, using character codes. This implementation
110
- # of "prev" does *not* work on such strings - while strings may contain
111
- # any characters you like, only the alphanumeric components are operated
112
- # upon.
113
- #
114
- # Should total underflow result, "nil" will be returned - e.g. "00".prev
115
- # returns 'nil', as does "a".prev. This is done even if there are other
116
- # characters in the string that were not touched - e.g. "+0.0".prev
117
- # also returns "nil". Broadly speaking, a "nil" return value is used for
118
- # any attempt to find the previous value of a string that could not have
119
- # been generated using "next" in the first place.
120
- #
121
- # As with "next" sometimes the result of "prev" can be a little obscure
122
- # so it is often best to try out things using "irb" if unsure. Note in
123
- # particular that software revision numbers do not necessarily behave
124
- # predictably, because they don't with "next"! E.g. "12.4.9" might go to
125
- # "12.4.10" for a revision number, but "12.4.9".next = "12.5.0". Thus
126
- # "12.5.0".prev = "12.4.9" and "12.4.10".prev = "12.4.09" (because the
127
- # only way to make "12.4.10" using "next" is to start at "12.4.09").
128
- #
129
- # Since 'succ' (successor) is an alias for 'next', so 'pred'
130
- # (predecessor) is an alias for 'prev'.
131
- #
132
- def prev(collapse = false)
133
- str = self.dup
134
- early_exit = false
135
- any_done = false
136
- ranges = [
137
- ('0'[0]..'9'[0]),
138
- ('a'[0]..'z'[0]),
139
- ('A'[0]..'Z'[0]),
140
- nil
141
- ]
142
-
143
- # Search forward for the first in-range character. If found check
144
- # to see if that character is "1", "a" or "A". If it is, record
145
- # its index (from 0 to string length - 1). We'll need this if
146
- # underflows wrap as far as the found byte because in that case
147
- # this first found byte should be deleted ("aa..." -> "z...",
148
- # "10..." -> "9...").
149
-
150
- first_ranged = nil
151
-
152
- for index in (1..str.length)
153
- byte = str[index - 1]
154
-
155
- # Determine whether or not the current byte is a number, lower case
156
- # or upper case letter. We expect 'select' to only find one matching
157
- # array entry in 'ranges', thus we dereference index 0 after the
158
- # 'end' to put a matching range from within 'ranges' into 'within',
159
- # or 'nil' for any unmatched byte.
160
-
161
- within = ranges.select do |range|
162
- range.nil? or range.include?(byte)
163
- end [0]
164
-
165
- unless within.nil?
166
- case within.first
167
- when '0'[0]
168
- match_byte = '1'[0]
169
- else
170
- match_byte = within.first
171
- end
172
-
173
- first_ranged = index - 1 if (byte == match_byte)
174
- first_within = within
175
- break
176
- end
177
- end
178
-
179
- for index in (1..str.length)
180
-
181
- # Process the input string in reverse character order - fetch the
182
- # bytes via negative index.
183
-
184
- byte = str[-index]
185
-
186
- within = ranges.select do |range|
187
- range.nil? or range.include?(byte)
188
- end [0]
189
-
190
- # Skip this letter unless within a known range. Otherwise note that
191
- # at least one byte was able to be processed.
192
-
193
- next if within.nil?
194
- any_done = true
195
-
196
- # Decrement the current byte. If it is still within its range, set
197
- # the byte and bail out - we're finished. Flag the early exit. If
198
- # the byte is no longer within range, wrap the character around
199
- # and continue the loop to carry the decrement to an earlier byte.
200
-
201
- byte = byte - 1
202
-
203
- if (within.include? byte)
204
- str[-index] = byte
205
- early_exit = true
206
- break
207
- else
208
- str[-index] = within.last
209
-
210
- # If we've just wrapped around a character immediately after the
211
- # one found right at the start ('0', 'a' or 'A') then this first
212
- # ranged character should be deleted (so "10" -> "09"
213
-
214
- if (first_ranged != nil and first_within.include?(byte + 1) and (first_ranged - str.length) == -(index + 1))
215
- str.slice!(-(index + 1))
216
- early_exit = true
217
- break
218
- end
219
- end
220
-
221
- end # From outer 'for' loop
222
-
223
- # If we did process at least one byte but we did not exit early, then
224
- # the loop completed due to carrying a decrement to other bytes. This
225
- # means an underflow condition - return 'nil'.
226
-
227
- if (any_done == true and early_exit == false)
228
- return nil
229
- else
230
- return str
231
- end
232
- end
233
-
234
- # As (extended) String::pred / String::prev, but modifies the string in
235
- # place rather than returning a copy. If underflow occurs, the string
236
- # will be unchanged. Returns 'self'.
237
- #
238
- def prev!
239
- new_str = prev
240
- self.replace(new_str) unless new_str.nil?
241
- return self
242
- end
243
-
244
- alias pred prev
245
- alias pred! prev!
246
-
247
- end
@@ -1,71 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- module Ytilib
4
- require "rexml/document"
5
- include REXML
6
-
7
- class Bismark < Document
8
-
9
- def initialize(source = nil, add_dtd = false)
10
- dtd = add_dtd ? "<!DOCTYPE smallbismark SYSTEM 'smallbismark.dtd'>#{$/}" : ""
11
- source == nil ? super("<?xml version='1.0' encoding='UTF-8'?>#{$/}#{dtd}") : super(source)
12
- super(IO.read(source)) if source != nil && root == nil
13
- if source == nil
14
- self.add_element("smallbismark")
15
- # xmlns breaks XPath for a REXML library under Linux, strange, indeed
16
- # self.add_element("smallbismark", {"xmlns" => "http://bioinform.imb.ac.ru/smallBiSMark/smallbismark.dtd"})
17
- self.root.add_element("comment", {"name" => "WARNING"}).add_text("This is a draft version of small-BiSMark. Specification is the subject to change!")
18
- end
19
- end
20
-
21
- def getXML
22
- beautify
23
- s = ""; write(s, 1, true)
24
- s.rstrip!
25
- return s
26
- end
27
- alias get_xml getXML
28
-
29
- def get_pm(xpath)
30
- pwmnode = self.elements[xpath]
31
- pm = PM.new_pm(pwmnode.attribute("length").value.to_i)
32
- toi = pwmnode.name == "PCM"
33
- pwmnode.elements.each("pm-column") { |c|
34
- position = c.attribute("position").value.to_i - 1
35
- weights = [c.elements["a"].get_text.value.strip.to_f,
36
- c.elements["c"].get_text.value.strip.to_f,
37
- c.elements["g"].get_text.value.strip.to_f,
38
- c.elements["t"].get_text.value.strip.to_f]
39
- weights.collect { |w| w.to_i } if toi
40
- pm['A'][position], pm['C'][position], pm['G'][position], pm['T'][position] = weights[0], weights[1], weights[2], weights[3]
41
- }
42
- return pm
43
- end
44
-
45
- private
46
- CONTAIN_NO_TEXT = {
47
- "segment" => :vasya_shmyak,
48
- "group" => :vasya_shmyak,
49
- "smallbismark" => :vasya_shmyak,
50
- "motif" => :vasya_shmyak,
51
- "PWM" => :vasya_shmyak,
52
- "PCM" => :vasya_shmyak,
53
- "PPM" => :vasya_shmyak,
54
- "source" => :vasya_shmyak,
55
- "factor" => :vasya_shmyak,
56
- "pm-column" => :vasya_shmyak,
57
- "word-list" => :vasya_shmyak}
58
-
59
- def beautify(node = self)
60
- if node == self
61
- self.delete_if { |e| e.is_a?(Text) }
62
- self.each { |e| beautify(e) }
63
- else
64
- node.delete_if { |e| e.is_a?(Text) } if node.respond_to?(:delete_if) && Bismark::CONTAIN_NO_TEXT.has_key?(node.name)
65
- node.each { |e| beautify(e) } if node.respond_to?(:each)
66
- end
67
- end
68
-
69
- end
70
-
71
- end
@@ -1,75 +0,0 @@
1
- require 'rexml/formatters/pretty'
2
-
3
- module REXML
4
- module Formatters
5
- # The Transitive formatter writes an XML document that parses to an
6
- # identical document as the source document. This means that no extra
7
- # whitespace nodes are inserted, and whitespace within text nodes is
8
- # preserved. Within these constraints, the document is pretty-printed,
9
- # with whitespace inserted into the metadata to introduce formatting.
10
- #
11
- # Note that this is only useful if the original XML is not already
12
- # formatted. Since this formatter does not alter whitespace nodes, the
13
- # results of formatting already formatted XML will be odd.
14
- class Transitive < Default
15
- def initialize( indentation=2 )
16
- @indentation = indentation
17
- @level = 0
18
- end
19
-
20
- protected
21
- def write_element( node, output )
22
- output << "\n" << ' '*@level
23
- output << "<#{node.expanded_name}"
24
-
25
- node.attributes.each_attribute do |attr|
26
- output << " "
27
- attr.write( output )
28
- end unless node.attributes.empty?
29
-
30
- if node.children.empty?
31
- output << "/>"
32
- else
33
- output << ">"
34
- # If compact and all children are text, and if the formatted output
35
- # is less than the specified width, then try to print everything on
36
- # one line
37
- skip = false
38
- @level += @indentation
39
-
40
- only_text = true
41
-
42
- node.children.each { |child|
43
- only_text = child.is_a?(REXML::Text) && only_text
44
- write( child, output )
45
- }
46
- @level -= @indentation
47
- output << "#{only_text ? "" : "\n" + ' '*@level}" << "</#{node.expanded_name}>"
48
- end
49
-
50
- end
51
-
52
- def write_text( node, output )
53
- output << node.to_s()
54
- end
55
- end
56
- end
57
-
58
- class Document
59
- def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
60
- if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
61
- output = Output.new( output, xml_decl.encoding )
62
- end
63
- formatter = if indent > -1
64
- if trans
65
- REXML::Formatters::Transitive.new( indent )
66
- else
67
- REXML::Formatters::Pretty.new( indent, ie_hack )
68
- end
69
- else
70
- REXML::Formatters::Default.new( ie_hack )
71
- end
72
- formatter.write( self, output )
73
- end
74
- end
75
- end
@@ -1,108 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- class Float
4
- # Using Stieltjes formula from http://www.luschny.de/math/factorial/approx/SimpleCases.html
5
- def log_fact
6
- return 0.0 if self <= 1
7
- a0 = 1.0/12
8
- a1 = 1.0/30
9
- a2 = 53.0/210
10
- a3 = 195.0/371
11
- a4 = 22999.0/22737
12
- a5 = 29944523.0/19733142
13
- a6 = 109535241009.0/48264275462
14
- z_big = self+1;
15
- (1.0/2)*Math.log(2*Math::PI)+(z_big-1.0/2)*Math.log(z_big)-z_big + a0/(z_big+a1/(z_big+a2/(z_big+a3/(z_big+a4/(z_big+a5/(z_big+a6/z_big))))))
16
- end
17
- end
18
-
19
- class Integer
20
- def log_fact
21
- self.to_f.log_fact
22
- end
23
- end
24
-
25
- # Naive version
26
- =begin
27
- class Integer
28
- @@fact_hash = {}
29
- def log_fact
30
- return 0.0 if self == 0
31
- return nil if self < 0
32
- if self <= 170
33
- @@fact_hash[self] = Math.log( lambda { |k| return k if self.times { |i| k *= i.next } }.call(1) )
34
- else
35
- return self.to_f.log_fact
36
- end unless @@fact_hash.has_key?(self)
37
- return @@fact_hash[self]
38
- end
39
- end
40
- =end
41
-
42
- module Ytilib
43
- class PM
44
- def infocod(position = nil)
45
- return infocod_private(position) if position
46
- (0...@size).collect { |i| infocod_private(i) }
47
- end
48
- alias icd infocod
49
-
50
- def icd2of4(floor = false)
51
- i2o4 = @words_count / 2.0
52
- i2o4 = i2o4.floor if floor
53
- ([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
54
- # 0 is equal to @words_count % 2, because 0! = 1!
55
- end
56
-
57
- def icd3of4(floor = false)
58
- i3o4 = @words_count / 3.0
59
- i3o4 = i3o4.floor if floor
60
- addon = floor ? @words_count % 3 : 0
61
- ([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
62
- end
63
-
64
- def icdThc
65
- icd3of4
66
- end
67
-
68
- def icdTlc
69
- io = @words_count / 6.0
70
- ([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
71
- end
72
-
73
- def icd4of4(floor = false)
74
- i4o4 = @words_count / 4.0
75
- i4o4 = i4o4.floor if floor
76
- ([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
77
- end
78
-
79
- protected
80
- def infocod_private(position)
81
- k_i = ['A','C','G','T'].collect { |letter| @matrix[letter][position] }
82
- ( k_i.inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
83
- end
84
- end
85
-
86
- class PPM
87
- def to_pcm(words_count = nil)
88
- @words_count = words_count if words_count
89
- checkerr("words count is not specified") { !@words_count }
90
- counts = PM.new_matrix(@size)
91
- (0...size).each { |i|
92
- ['A', 'C', 'G', 'T'].each { |l|
93
- counts[l][i] = @matrix[l][i] * @words_count
94
- }
95
- }
96
- return PM.new(size, counts)
97
- end
98
- alias to_pcm get_pcm
99
-
100
- def infocod(position = nil)
101
- return to_pcm.infocod(position)
102
- end
103
-
104
- def icd(position = nil)
105
- return to_pcm.infocod(position)
106
- end
107
- end
108
- end