sequence_logo 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +0,0 @@
1
- require_relative 'ytilib/ytilib'
2
- require_relative 'ytilib/addon'
3
- require_relative 'ytilib/iupac'
4
- require_relative 'ytilib/pm'
5
- require_relative 'ytilib/pmsd'
6
- require_relative 'ytilib/randoom'
7
- require_relative 'ytilib/bismark'
8
- require_relative 'ytilib/hack1'
9
- require_relative 'ytilib/infocod'
10
- require_relative 'ytilib/ppm_support'
@@ -1,247 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- def File.ext_wo_name(what)
4
- return what if what.rindex(".") == nil
5
- what = File.basename(what)
6
- "#{what}"[what.rindex(".")+1..-1]
7
- end
8
-
9
- def File.name_wo_ext(what)
10
- return what if what.rindex(".") == nil
11
- what = File.basename(what)
12
- "#{what}"[0...what.rindex(".")]
13
- end
14
-
15
- class Float
16
- def round_to(x)
17
- (self * 10**x).round.to_f / 10**x
18
- end
19
-
20
- def cut_to(x)
21
- (self.abs * 10**x).floor.to_f * (self == 0.0 ? 0 : self/self.abs).round / 10**x
22
- end
23
- end
24
-
25
- class Array
26
- def shuffle
27
- arr = self.dup
28
- arr.size.downto 2 do |j|
29
- r = rand j
30
- arr[j-1], arr[r] = arr[r], arr[j-1]
31
- end
32
- arr
33
- end
34
-
35
- def shuffle!
36
- (size - 1).downto 1 do |i|
37
- j = rand(i + 1)
38
- self[i], self[j] = self[j], self[i]
39
- end
40
- self
41
- end
42
-
43
- def average
44
- self.empty? ? nil : self.inject(0) { |sum,s| sum += s } / self.size
45
- end
46
- alias mean average
47
-
48
- def variance
49
- return self.collect { |s| s*s }.average - average**2
50
- end
51
-
52
- def sum
53
- self.inject(self[0]) { |sum,s| sum += s} - self[0]
54
- end
55
-
56
- end
57
-
58
- class String
59
-
60
- def compl!
61
- self.tr!("acgtACGT", "tgcaTGCA")
62
- return self
63
- end
64
-
65
- def compl
66
- return self.tr("acgtACGT", "tgcaTGCA")
67
- end
68
-
69
- alias comp! compl!
70
- alias complement! compl!
71
- alias comp compl
72
- alias complement compl
73
-
74
- def revcomp
75
- return comp.reverse
76
- end
77
-
78
- def revcomp!
79
- return comp!.reverse!
80
- end
81
-
82
- def to_id
83
- return self.gsub(/[^.\w]/, '_').upcase
84
- end
85
-
86
- end
87
-
88
- # Also this can be done is a more sophisticated way
89
- =begin
90
- String.class_eval do
91
- def to_id
92
- return self.gsub(/[^.\w]/, '_')
93
- end
94
- end
95
- =end
96
-
97
- class String
98
- # The opposite of String::next / String::succ. It is impossible to be a
99
- # *complete* opposite because both "9".next = "10" and "09".next = "10";
100
- # if going backwards from "10" there's no way to know whether the result
101
- # should be "09" or "9". Where the first ranged character is about to
102
- # underflow and the next character is within the same range the result
103
- # is shrunk down - that is, "10" goes to "9", "aa" goes to "z"; any non-
104
- # range prefix or suffix is OK, e.g. "+!$%10-=+" goes to "+!$%9-=+".
105
- # Items in the middle of a string don't do this - e.g. "12.10" goes to
106
- # "12.09", to match how "next" would work as best as possible.
107
- #
108
- # The standard "next" function works on strings that contain *no*
109
- # alphanumeric characters, using character codes. This implementation
110
- # of "prev" does *not* work on such strings - while strings may contain
111
- # any characters you like, only the alphanumeric components are operated
112
- # upon.
113
- #
114
- # Should total underflow result, "nil" will be returned - e.g. "00".prev
115
- # returns 'nil', as does "a".prev. This is done even if there are other
116
- # characters in the string that were not touched - e.g. "+0.0".prev
117
- # also returns "nil". Broadly speaking, a "nil" return value is used for
118
- # any attempt to find the previous value of a string that could not have
119
- # been generated using "next" in the first place.
120
- #
121
- # As with "next" sometimes the result of "prev" can be a little obscure
122
- # so it is often best to try out things using "irb" if unsure. Note in
123
- # particular that software revision numbers do not necessarily behave
124
- # predictably, because they don't with "next"! E.g. "12.4.9" might go to
125
- # "12.4.10" for a revision number, but "12.4.9".next = "12.5.0". Thus
126
- # "12.5.0".prev = "12.4.9" and "12.4.10".prev = "12.4.09" (because the
127
- # only way to make "12.4.10" using "next" is to start at "12.4.09").
128
- #
129
- # Since 'succ' (successor) is an alias for 'next', so 'pred'
130
- # (predecessor) is an alias for 'prev'.
131
- #
132
- def prev(collapse = false)
133
- str = self.dup
134
- early_exit = false
135
- any_done = false
136
- ranges = [
137
- ('0'[0]..'9'[0]),
138
- ('a'[0]..'z'[0]),
139
- ('A'[0]..'Z'[0]),
140
- nil
141
- ]
142
-
143
- # Search forward for the first in-range character. If found check
144
- # to see if that character is "1", "a" or "A". If it is, record
145
- # its index (from 0 to string length - 1). We'll need this if
146
- # underflows wrap as far as the found byte because in that case
147
- # this first found byte should be deleted ("aa..." -> "z...",
148
- # "10..." -> "9...").
149
-
150
- first_ranged = nil
151
-
152
- for index in (1..str.length)
153
- byte = str[index - 1]
154
-
155
- # Determine whether or not the current byte is a number, lower case
156
- # or upper case letter. We expect 'select' to only find one matching
157
- # array entry in 'ranges', thus we dereference index 0 after the
158
- # 'end' to put a matching range from within 'ranges' into 'within',
159
- # or 'nil' for any unmatched byte.
160
-
161
- within = ranges.select do |range|
162
- range.nil? or range.include?(byte)
163
- end [0]
164
-
165
- unless within.nil?
166
- case within.first
167
- when '0'[0]
168
- match_byte = '1'[0]
169
- else
170
- match_byte = within.first
171
- end
172
-
173
- first_ranged = index - 1 if (byte == match_byte)
174
- first_within = within
175
- break
176
- end
177
- end
178
-
179
- for index in (1..str.length)
180
-
181
- # Process the input string in reverse character order - fetch the
182
- # bytes via negative index.
183
-
184
- byte = str[-index]
185
-
186
- within = ranges.select do |range|
187
- range.nil? or range.include?(byte)
188
- end [0]
189
-
190
- # Skip this letter unless within a known range. Otherwise note that
191
- # at least one byte was able to be processed.
192
-
193
- next if within.nil?
194
- any_done = true
195
-
196
- # Decrement the current byte. If it is still within its range, set
197
- # the byte and bail out - we're finished. Flag the early exit. If
198
- # the byte is no longer within range, wrap the character around
199
- # and continue the loop to carry the decrement to an earlier byte.
200
-
201
- byte = byte - 1
202
-
203
- if (within.include? byte)
204
- str[-index] = byte
205
- early_exit = true
206
- break
207
- else
208
- str[-index] = within.last
209
-
210
- # If we've just wrapped around a character immediately after the
211
- # one found right at the start ('0', 'a' or 'A') then this first
212
- # ranged character should be deleted (so "10" -> "09"
213
-
214
- if (first_ranged != nil and first_within.include?(byte + 1) and (first_ranged - str.length) == -(index + 1))
215
- str.slice!(-(index + 1))
216
- early_exit = true
217
- break
218
- end
219
- end
220
-
221
- end # From outer 'for' loop
222
-
223
- # If we did process at least one byte but we did not exit early, then
224
- # the loop completed due to carrying a decrement to other bytes. This
225
- # means an underflow condition - return 'nil'.
226
-
227
- if (any_done == true and early_exit == false)
228
- return nil
229
- else
230
- return str
231
- end
232
- end
233
-
234
- # As (extended) String::pred / String::prev, but modifies the string in
235
- # place rather than returning a copy. If underflow occurs, the string
236
- # will be unchanged. Returns 'self'.
237
- #
238
- def prev!
239
- new_str = prev
240
- self.replace(new_str) unless new_str.nil?
241
- return self
242
- end
243
-
244
- alias pred prev
245
- alias pred! prev!
246
-
247
- end
@@ -1,71 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- module Ytilib
4
- require "rexml/document"
5
- include REXML
6
-
7
- class Bismark < Document
8
-
9
- def initialize(source = nil, add_dtd = false)
10
- dtd = add_dtd ? "<!DOCTYPE smallbismark SYSTEM 'smallbismark.dtd'>#{$/}" : ""
11
- source == nil ? super("<?xml version='1.0' encoding='UTF-8'?>#{$/}#{dtd}") : super(source)
12
- super(IO.read(source)) if source != nil && root == nil
13
- if source == nil
14
- self.add_element("smallbismark")
15
- # xmlns breaks XPath for a REXML library under Linux, strange, indeed
16
- # self.add_element("smallbismark", {"xmlns" => "http://bioinform.imb.ac.ru/smallBiSMark/smallbismark.dtd"})
17
- self.root.add_element("comment", {"name" => "WARNING"}).add_text("This is a draft version of small-BiSMark. Specification is the subject to change!")
18
- end
19
- end
20
-
21
- def getXML
22
- beautify
23
- s = ""; write(s, 1, true)
24
- s.rstrip!
25
- return s
26
- end
27
- alias get_xml getXML
28
-
29
- def get_pm(xpath)
30
- pwmnode = self.elements[xpath]
31
- pm = PM.new_pm(pwmnode.attribute("length").value.to_i)
32
- toi = pwmnode.name == "PCM"
33
- pwmnode.elements.each("pm-column") { |c|
34
- position = c.attribute("position").value.to_i - 1
35
- weights = [c.elements["a"].get_text.value.strip.to_f,
36
- c.elements["c"].get_text.value.strip.to_f,
37
- c.elements["g"].get_text.value.strip.to_f,
38
- c.elements["t"].get_text.value.strip.to_f]
39
- weights.collect { |w| w.to_i } if toi
40
- pm['A'][position], pm['C'][position], pm['G'][position], pm['T'][position] = weights[0], weights[1], weights[2], weights[3]
41
- }
42
- return pm
43
- end
44
-
45
- private
46
- CONTAIN_NO_TEXT = {
47
- "segment" => :vasya_shmyak,
48
- "group" => :vasya_shmyak,
49
- "smallbismark" => :vasya_shmyak,
50
- "motif" => :vasya_shmyak,
51
- "PWM" => :vasya_shmyak,
52
- "PCM" => :vasya_shmyak,
53
- "PPM" => :vasya_shmyak,
54
- "source" => :vasya_shmyak,
55
- "factor" => :vasya_shmyak,
56
- "pm-column" => :vasya_shmyak,
57
- "word-list" => :vasya_shmyak}
58
-
59
- def beautify(node = self)
60
- if node == self
61
- self.delete_if { |e| e.is_a?(Text) }
62
- self.each { |e| beautify(e) }
63
- else
64
- node.delete_if { |e| e.is_a?(Text) } if node.respond_to?(:delete_if) && Bismark::CONTAIN_NO_TEXT.has_key?(node.name)
65
- node.each { |e| beautify(e) } if node.respond_to?(:each)
66
- end
67
- end
68
-
69
- end
70
-
71
- end
@@ -1,75 +0,0 @@
1
- require 'rexml/formatters/pretty'
2
-
3
- module REXML
4
- module Formatters
5
- # The Transitive formatter writes an XML document that parses to an
6
- # identical document as the source document. This means that no extra
7
- # whitespace nodes are inserted, and whitespace within text nodes is
8
- # preserved. Within these constraints, the document is pretty-printed,
9
- # with whitespace inserted into the metadata to introduce formatting.
10
- #
11
- # Note that this is only useful if the original XML is not already
12
- # formatted. Since this formatter does not alter whitespace nodes, the
13
- # results of formatting already formatted XML will be odd.
14
- class Transitive < Default
15
- def initialize( indentation=2 )
16
- @indentation = indentation
17
- @level = 0
18
- end
19
-
20
- protected
21
- def write_element( node, output )
22
- output << "\n" << ' '*@level
23
- output << "<#{node.expanded_name}"
24
-
25
- node.attributes.each_attribute do |attr|
26
- output << " "
27
- attr.write( output )
28
- end unless node.attributes.empty?
29
-
30
- if node.children.empty?
31
- output << "/>"
32
- else
33
- output << ">"
34
- # If compact and all children are text, and if the formatted output
35
- # is less than the specified width, then try to print everything on
36
- # one line
37
- skip = false
38
- @level += @indentation
39
-
40
- only_text = true
41
-
42
- node.children.each { |child|
43
- only_text = child.is_a?(REXML::Text) && only_text
44
- write( child, output )
45
- }
46
- @level -= @indentation
47
- output << "#{only_text ? "" : "\n" + ' '*@level}" << "</#{node.expanded_name}>"
48
- end
49
-
50
- end
51
-
52
- def write_text( node, output )
53
- output << node.to_s()
54
- end
55
- end
56
- end
57
-
58
- class Document
59
- def write( output=$stdout, indent=-1, trans=false, ie_hack=false )
60
- if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
61
- output = Output.new( output, xml_decl.encoding )
62
- end
63
- formatter = if indent > -1
64
- if trans
65
- REXML::Formatters::Transitive.new( indent )
66
- else
67
- REXML::Formatters::Pretty.new( indent, ie_hack )
68
- end
69
- else
70
- REXML::Formatters::Default.new( ie_hack )
71
- end
72
- formatter.write( self, output )
73
- end
74
- end
75
- end
@@ -1,108 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- class Float
4
- # Using Stieltjes formula from http://www.luschny.de/math/factorial/approx/SimpleCases.html
5
- def log_fact
6
- return 0.0 if self <= 1
7
- a0 = 1.0/12
8
- a1 = 1.0/30
9
- a2 = 53.0/210
10
- a3 = 195.0/371
11
- a4 = 22999.0/22737
12
- a5 = 29944523.0/19733142
13
- a6 = 109535241009.0/48264275462
14
- z_big = self+1;
15
- (1.0/2)*Math.log(2*Math::PI)+(z_big-1.0/2)*Math.log(z_big)-z_big + a0/(z_big+a1/(z_big+a2/(z_big+a3/(z_big+a4/(z_big+a5/(z_big+a6/z_big))))))
16
- end
17
- end
18
-
19
- class Integer
20
- def log_fact
21
- self.to_f.log_fact
22
- end
23
- end
24
-
25
- # Naive version
26
- =begin
27
- class Integer
28
- @@fact_hash = {}
29
- def log_fact
30
- return 0.0 if self == 0
31
- return nil if self < 0
32
- if self <= 170
33
- @@fact_hash[self] = Math.log( lambda { |k| return k if self.times { |i| k *= i.next } }.call(1) )
34
- else
35
- return self.to_f.log_fact
36
- end unless @@fact_hash.has_key?(self)
37
- return @@fact_hash[self]
38
- end
39
- end
40
- =end
41
-
42
- module Ytilib
43
- class PM
44
- def infocod(position = nil)
45
- return infocod_private(position) if position
46
- (0...@size).collect { |i| infocod_private(i) }
47
- end
48
- alias icd infocod
49
-
50
- def icd2of4(floor = false)
51
- i2o4 = @words_count / 2.0
52
- i2o4 = i2o4.floor if floor
53
- ([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
54
- # 0 is equal to @words_count % 2, because 0! = 1!
55
- end
56
-
57
- def icd3of4(floor = false)
58
- i3o4 = @words_count / 3.0
59
- i3o4 = i3o4.floor if floor
60
- addon = floor ? @words_count % 3 : 0
61
- ([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
62
- end
63
-
64
- def icdThc
65
- icd3of4
66
- end
67
-
68
- def icdTlc
69
- io = @words_count / 6.0
70
- ([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
71
- end
72
-
73
- def icd4of4(floor = false)
74
- i4o4 = @words_count / 4.0
75
- i4o4 = i4o4.floor if floor
76
- ([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
77
- end
78
-
79
- protected
80
- def infocod_private(position)
81
- k_i = ['A','C','G','T'].collect { |letter| @matrix[letter][position] }
82
- ( k_i.inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count
83
- end
84
- end
85
-
86
- class PPM
87
- def to_pcm(words_count = nil)
88
- @words_count = words_count if words_count
89
- checkerr("words count is not specified") { !@words_count }
90
- counts = PM.new_matrix(@size)
91
- (0...size).each { |i|
92
- ['A', 'C', 'G', 'T'].each { |l|
93
- counts[l][i] = @matrix[l][i] * @words_count
94
- }
95
- }
96
- return PM.new(size, counts)
97
- end
98
- alias to_pcm get_pcm
99
-
100
- def infocod(position = nil)
101
- return to_pcm.infocod(position)
102
- end
103
-
104
- def icd(position = nil)
105
- return to_pcm.infocod(position)
106
- end
107
- end
108
- end