sequence_logo 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,247 +1,247 @@
1
- #!/usr/bin/ruby
2
-
3
- def File.ext_wo_name(what)
4
- return what if what.rindex(".") == nil
5
- what = File.basename(what)
6
- "#{what}"[what.rindex(".")+1..-1]
7
- end
8
-
9
- def File.name_wo_ext(what)
10
- return what if what.rindex(".") == nil
11
- what = File.basename(what)
12
- "#{what}"[0...what.rindex(".")]
13
- end
14
-
15
- class Float
16
- def round_to(x)
17
- (self * 10**x).round.to_f / 10**x
18
- end
19
-
20
- def cut_to(x)
21
- (self.abs * 10**x).floor.to_f * (self == 0.0 ? 0 : self/self.abs).round / 10**x
22
- end
23
- end
24
-
25
- class Array
26
- def shuffle
27
- arr = self.dup
28
- arr.size.downto 2 do |j|
29
- r = rand j
30
- arr[j-1], arr[r] = arr[r], arr[j-1]
31
- end
32
- arr
33
- end
34
-
35
- def shuffle!
36
- (size - 1).downto 1 do |i|
37
- j = rand(i + 1)
38
- self[i], self[j] = self[j], self[i]
39
- end
40
- self
41
- end
42
-
43
- def average
44
- self.empty? ? nil : self.inject(0) { |sum,s| sum += s } / self.size
45
- end
46
- alias mean average
47
-
48
- def variance
49
- return self.collect { |s| s*s }.average - average**2
50
- end
51
-
52
- def sum
53
- self.inject(self[0]) { |sum,s| sum += s} - self[0]
54
- end
55
-
56
- end
57
-
58
- class String
59
-
60
- def compl!
61
- self.tr!("acgtACGT", "tgcaTGCA")
62
- return self
63
- end
64
-
65
- def compl
66
- return self.tr("acgtACGT", "tgcaTGCA")
67
- end
68
-
69
- alias comp! compl!
70
- alias complement! compl!
71
- alias comp compl
72
- alias complement compl
73
-
74
- def revcomp
75
- return comp.reverse
76
- end
77
-
78
- def revcomp!
79
- return comp!.reverse!
80
- end
81
-
82
- def to_id
83
- return self.gsub(/[^.\w]/, '_').upcase
84
- end
85
-
86
- end
87
-
88
- # Also this can be done is a more sophisticated way
89
- =begin
90
- String.class_eval do
91
- def to_id
92
- return self.gsub(/[^.\w]/, '_')
93
- end
94
- end
95
- =end
96
-
97
- class String
98
- # The opposite of String::next / String::succ. It is impossible to be a
99
- # *complete* opposite because both "9".next = "10" and "09".next = "10";
100
- # if going backwards from "10" there's no way to know whether the result
101
- # should be "09" or "9". Where the first ranged character is about to
102
- # underflow and the next character is within the same range the result
103
- # is shrunk down - that is, "10" goes to "9", "aa" goes to "z"; any non-
104
- # range prefix or suffix is OK, e.g. "+!$%10-=+" goes to "+!$%9-=+".
105
- # Items in the middle of a string don't do this - e.g. "12.10" goes to
106
- # "12.09", to match how "next" would work as best as possible.
107
- #
108
- # The standard "next" function works on strings that contain *no*
109
- # alphanumeric characters, using character codes. This implementation
110
- # of "prev" does *not* work on such strings - while strings may contain
111
- # any characters you like, only the alphanumeric components are operated
112
- # upon.
113
- #
114
- # Should total underflow result, "nil" will be returned - e.g. "00".prev
115
- # returns 'nil', as does "a".prev. This is done even if there are other
116
- # characters in the string that were not touched - e.g. "+0.0".prev
117
- # also returns "nil". Broadly speaking, a "nil" return value is used for
118
- # any attempt to find the previous value of a string that could not have
119
- # been generated using "next" in the first place.
120
- #
121
- # As with "next" sometimes the result of "prev" can be a little obscure
122
- # so it is often best to try out things using "irb" if unsure. Note in
123
- # particular that software revision numbers do not necessarily behave
124
- # predictably, because they don't with "next"! E.g. "12.4.9" might go to
125
- # "12.4.10" for a revision number, but "12.4.9".next = "12.5.0". Thus
126
- # "12.5.0".prev = "12.4.9" and "12.4.10".prev = "12.4.09" (because the
127
- # only way to make "12.4.10" using "next" is to start at "12.4.09").
128
- #
129
- # Since 'succ' (successor) is an alias for 'next', so 'pred'
130
- # (predecessor) is an alias for 'prev'.
131
- #
132
- def prev(collapse = false)
133
- str = self.dup
134
- early_exit = false
135
- any_done = false
136
- ranges = [
137
- ('0'[0]..'9'[0]),
138
- ('a'[0]..'z'[0]),
139
- ('A'[0]..'Z'[0]),
140
- nil
141
- ]
142
-
143
- # Search forward for the first in-range character. If found check
144
- # to see if that character is "1", "a" or "A". If it is, record
145
- # its index (from 0 to string length - 1). We'll need this if
146
- # underflows wrap as far as the found byte because in that case
147
- # this first found byte should be deleted ("aa..." -> "z...",
148
- # "10..." -> "9...").
149
-
150
- first_ranged = nil
151
-
152
- for index in (1..str.length)
153
- byte = str[index - 1]
154
-
155
- # Determine whether or not the current byte is a number, lower case
156
- # or upper case letter. We expect 'select' to only find one matching
157
- # array entry in 'ranges', thus we dereference index 0 after the
158
- # 'end' to put a matching range from within 'ranges' into 'within',
159
- # or 'nil' for any unmatched byte.
160
-
161
- within = ranges.select do |range|
162
- range.nil? or range.include?(byte)
163
- end [0]
164
-
165
- unless within.nil?
166
- case within.first
167
- when '0'[0]
168
- match_byte = '1'[0]
169
- else
170
- match_byte = within.first
171
- end
172
-
173
- first_ranged = index - 1 if (byte == match_byte)
174
- first_within = within
175
- break
176
- end
177
- end
178
-
179
- for index in (1..str.length)
180
-
181
- # Process the input string in reverse character order - fetch the
182
- # bytes via negative index.
183
-
184
- byte = str[-index]
185
-
186
- within = ranges.select do |range|
187
- range.nil? or range.include?(byte)
188
- end [0]
189
-
190
- # Skip this letter unless within a known range. Otherwise note that
191
- # at least one byte was able to be processed.
192
-
193
- next if within.nil?
194
- any_done = true
195
-
196
- # Decrement the current byte. If it is still within its range, set
197
- # the byte and bail out - we're finished. Flag the early exit. If
198
- # the byte is no longer within range, wrap the character around
199
- # and continue the loop to carry the decrement to an earlier byte.
200
-
201
- byte = byte - 1
202
-
203
- if (within.include? byte)
204
- str[-index] = byte
205
- early_exit = true
206
- break
207
- else
208
- str[-index] = within.last
209
-
210
- # If we've just wrapped around a character immediately after the
211
- # one found right at the start ('0', 'a' or 'A') then this first
212
- # ranged character should be deleted (so "10" -> "09"
213
-
214
- if (first_ranged != nil and first_within.include?(byte + 1) and (first_ranged - str.length) == -(index + 1))
215
- str.slice!(-(index + 1))
216
- early_exit = true
217
- break
218
- end
219
- end
220
-
221
- end # From outer 'for' loop
222
-
223
- # If we did process at least one byte but we did not exit early, then
224
- # the loop completed due to carrying a decrement to other bytes. This
225
- # means an underflow condition - return 'nil'.
226
-
227
- if (any_done == true and early_exit == false)
228
- return nil
229
- else
230
- return str
231
- end
232
- end
233
-
234
- # As (extended) String::pred / String::prev, but modifies the string in
235
- # place rather than returning a copy. If underflow occurs, the string
236
- # will be unchanged. Returns 'self'.
237
- #
238
- def prev!
239
- new_str = prev
240
- self.replace(new_str) unless new_str.nil?
241
- return self
242
- end
243
-
244
- alias pred prev
245
- alias pred! prev!
246
-
1
+ #!/usr/bin/ruby
2
+
3
+ def File.ext_wo_name(what)
4
+ return what if what.rindex(".") == nil
5
+ what = File.basename(what)
6
+ "#{what}"[what.rindex(".")+1..-1]
7
+ end
8
+
9
+ def File.name_wo_ext(what)
10
+ return what if what.rindex(".") == nil
11
+ what = File.basename(what)
12
+ "#{what}"[0...what.rindex(".")]
13
+ end
14
+
15
+ class Float
16
+ def round_to(x)
17
+ (self * 10**x).round.to_f / 10**x
18
+ end
19
+
20
+ def cut_to(x)
21
+ (self.abs * 10**x).floor.to_f * (self == 0.0 ? 0 : self/self.abs).round / 10**x
22
+ end
23
+ end
24
+
25
+ class Array
26
+ def shuffle
27
+ arr = self.dup
28
+ arr.size.downto 2 do |j|
29
+ r = rand j
30
+ arr[j-1], arr[r] = arr[r], arr[j-1]
31
+ end
32
+ arr
33
+ end
34
+
35
+ def shuffle!
36
+ (size - 1).downto 1 do |i|
37
+ j = rand(i + 1)
38
+ self[i], self[j] = self[j], self[i]
39
+ end
40
+ self
41
+ end
42
+
43
+ def average
44
+ self.empty? ? nil : self.inject(0) { |sum,s| sum += s } / self.size
45
+ end
46
+ alias mean average
47
+
48
+ def variance
49
+ return self.collect { |s| s*s }.average - average**2
50
+ end
51
+
52
+ def sum
53
+ self.inject(self[0]) { |sum,s| sum += s} - self[0]
54
+ end
55
+
56
+ end
57
+
58
+ class String
59
+
60
+ def compl!
61
+ self.tr!("acgtACGT", "tgcaTGCA")
62
+ return self
63
+ end
64
+
65
+ def compl
66
+ return self.tr("acgtACGT", "tgcaTGCA")
67
+ end
68
+
69
+ alias comp! compl!
70
+ alias complement! compl!
71
+ alias comp compl
72
+ alias complement compl
73
+
74
+ def revcomp
75
+ return comp.reverse
76
+ end
77
+
78
+ def revcomp!
79
+ return comp!.reverse!
80
+ end
81
+
82
+ def to_id
83
+ return self.gsub(/[^.\w]/, '_').upcase
84
+ end
85
+
86
+ end
87
+
88
+ # Also this can be done is a more sophisticated way
89
+ =begin
90
+ String.class_eval do
91
+ def to_id
92
+ return self.gsub(/[^.\w]/, '_')
93
+ end
94
+ end
95
+ =end
96
+
97
+ class String
98
+ # The opposite of String::next / String::succ. It is impossible to be a
99
+ # *complete* opposite because both "9".next = "10" and "09".next = "10";
100
+ # if going backwards from "10" there's no way to know whether the result
101
+ # should be "09" or "9". Where the first ranged character is about to
102
+ # underflow and the next character is within the same range the result
103
+ # is shrunk down - that is, "10" goes to "9", "aa" goes to "z"; any non-
104
+ # range prefix or suffix is OK, e.g. "+!$%10-=+" goes to "+!$%9-=+".
105
+ # Items in the middle of a string don't do this - e.g. "12.10" goes to
106
+ # "12.09", to match how "next" would work as best as possible.
107
+ #
108
+ # The standard "next" function works on strings that contain *no*
109
+ # alphanumeric characters, using character codes. This implementation
110
+ # of "prev" does *not* work on such strings - while strings may contain
111
+ # any characters you like, only the alphanumeric components are operated
112
+ # upon.
113
+ #
114
+ # Should total underflow result, "nil" will be returned - e.g. "00".prev
115
+ # returns 'nil', as does "a".prev. This is done even if there are other
116
+ # characters in the string that were not touched - e.g. "+0.0".prev
117
+ # also returns "nil". Broadly speaking, a "nil" return value is used for
118
+ # any attempt to find the previous value of a string that could not have
119
+ # been generated using "next" in the first place.
120
+ #
121
+ # As with "next" sometimes the result of "prev" can be a little obscure
122
+ # so it is often best to try out things using "irb" if unsure. Note in
123
+ # particular that software revision numbers do not necessarily behave
124
+ # predictably, because they don't with "next"! E.g. "12.4.9" might go to
125
+ # "12.4.10" for a revision number, but "12.4.9".next = "12.5.0". Thus
126
+ # "12.5.0".prev = "12.4.9" and "12.4.10".prev = "12.4.09" (because the
127
+ # only way to make "12.4.10" using "next" is to start at "12.4.09").
128
+ #
129
+ # Since 'succ' (successor) is an alias for 'next', so 'pred'
130
+ # (predecessor) is an alias for 'prev'.
131
+ #
132
+ def prev(collapse = false)
133
+ str = self.dup
134
+ early_exit = false
135
+ any_done = false
136
+ ranges = [
137
+ ('0'[0]..'9'[0]),
138
+ ('a'[0]..'z'[0]),
139
+ ('A'[0]..'Z'[0]),
140
+ nil
141
+ ]
142
+
143
+ # Search forward for the first in-range character. If found check
144
+ # to see if that character is "1", "a" or "A". If it is, record
145
+ # its index (from 0 to string length - 1). We'll need this if
146
+ # underflows wrap as far as the found byte because in that case
147
+ # this first found byte should be deleted ("aa..." -> "z...",
148
+ # "10..." -> "9...").
149
+
150
+ first_ranged = nil
151
+
152
+ for index in (1..str.length)
153
+ byte = str[index - 1]
154
+
155
+ # Determine whether or not the current byte is a number, lower case
156
+ # or upper case letter. We expect 'select' to only find one matching
157
+ # array entry in 'ranges', thus we dereference index 0 after the
158
+ # 'end' to put a matching range from within 'ranges' into 'within',
159
+ # or 'nil' for any unmatched byte.
160
+
161
+ within = ranges.select do |range|
162
+ range.nil? or range.include?(byte)
163
+ end [0]
164
+
165
+ unless within.nil?
166
+ case within.first
167
+ when '0'[0]
168
+ match_byte = '1'[0]
169
+ else
170
+ match_byte = within.first
171
+ end
172
+
173
+ first_ranged = index - 1 if (byte == match_byte)
174
+ first_within = within
175
+ break
176
+ end
177
+ end
178
+
179
+ for index in (1..str.length)
180
+
181
+ # Process the input string in reverse character order - fetch the
182
+ # bytes via negative index.
183
+
184
+ byte = str[-index]
185
+
186
+ within = ranges.select do |range|
187
+ range.nil? or range.include?(byte)
188
+ end [0]
189
+
190
+ # Skip this letter unless within a known range. Otherwise note that
191
+ # at least one byte was able to be processed.
192
+
193
+ next if within.nil?
194
+ any_done = true
195
+
196
+ # Decrement the current byte. If it is still within its range, set
197
+ # the byte and bail out - we're finished. Flag the early exit. If
198
+ # the byte is no longer within range, wrap the character around
199
+ # and continue the loop to carry the decrement to an earlier byte.
200
+
201
+ byte = byte - 1
202
+
203
+ if (within.include? byte)
204
+ str[-index] = byte
205
+ early_exit = true
206
+ break
207
+ else
208
+ str[-index] = within.last
209
+
210
+ # If we've just wrapped around a character immediately after the
211
+ # one found right at the start ('0', 'a' or 'A') then this first
212
+ # ranged character should be deleted (so "10" -> "09"
213
+
214
+ if (first_ranged != nil and first_within.include?(byte + 1) and (first_ranged - str.length) == -(index + 1))
215
+ str.slice!(-(index + 1))
216
+ early_exit = true
217
+ break
218
+ end
219
+ end
220
+
221
+ end # From outer 'for' loop
222
+
223
+ # If we did process at least one byte but we did not exit early, then
224
+ # the loop completed due to carrying a decrement to other bytes. This
225
+ # means an underflow condition - return 'nil'.
226
+
227
+ if (any_done == true and early_exit == false)
228
+ return nil
229
+ else
230
+ return str
231
+ end
232
+ end
233
+
234
+ # As (extended) String::pred / String::prev, but modifies the string in
235
+ # place rather than returning a copy. If underflow occurs, the string
236
+ # will be unchanged. Returns 'self'.
237
+ #
238
+ def prev!
239
+ new_str = prev
240
+ self.replace(new_str) unless new_str.nil?
241
+ return self
242
+ end
243
+
244
+ alias pred prev
245
+ alias pred! prev!
246
+
247
247
  end
@@ -1,71 +1,71 @@
1
- #!/usr/bin/ruby
2
-
3
- module Ytilib
4
- require "rexml/document"
5
- include REXML
6
-
7
- class Bismark < Document
8
-
9
- def initialize(source = nil, add_dtd = false)
10
- dtd = add_dtd ? "<!DOCTYPE smallbismark SYSTEM 'smallbismark.dtd'>#{$/}" : ""
11
- source == nil ? super("<?xml version='1.0' encoding='UTF-8'?>#{$/}#{dtd}") : super(source)
12
- super(IO.read(source)) if source != nil && root == nil
13
- if source == nil
14
- self.add_element("smallbismark")
15
- # xmlns breaks XPath for a REXML library under Linux, strange, indeed
16
- # self.add_element("smallbismark", {"xmlns" => "http://bioinform.imb.ac.ru/smallBiSMark/smallbismark.dtd"})
17
- self.root.add_element("comment", {"name" => "WARNING"}).add_text("This is a draft version of small-BiSMark. Specification is the subject to change!")
18
- end
19
- end
20
-
21
- def getXML
22
- beautify
23
- s = ""; write(s, 1, true)
24
- s.rstrip!
25
- return s
26
- end
27
- alias get_xml getXML
28
-
29
- def get_pm(xpath)
30
- pwmnode = self.elements[xpath]
31
- pm = PM.new_pm(pwmnode.attribute("length").value.to_i)
32
- toi = pwmnode.name == "PCM"
33
- pwmnode.elements.each("pm-column") { |c|
34
- position = c.attribute("position").value.to_i - 1
35
- weights = [c.elements["a"].get_text.value.strip.to_f,
36
- c.elements["c"].get_text.value.strip.to_f,
37
- c.elements["g"].get_text.value.strip.to_f,
38
- c.elements["t"].get_text.value.strip.to_f]
39
- weights.collect { |w| w.to_i } if toi
40
- pm['A'][position], pm['C'][position], pm['G'][position], pm['T'][position] = weights[0], weights[1], weights[2], weights[3]
41
- }
42
- return pm
43
- end
44
-
45
- private
46
- CONTAIN_NO_TEXT = {
47
- "segment" => :vasya_shmyak,
48
- "group" => :vasya_shmyak,
49
- "smallbismark" => :vasya_shmyak,
50
- "motif" => :vasya_shmyak,
51
- "PWM" => :vasya_shmyak,
52
- "PCM" => :vasya_shmyak,
53
- "PPM" => :vasya_shmyak,
54
- "source" => :vasya_shmyak,
55
- "factor" => :vasya_shmyak,
56
- "pm-column" => :vasya_shmyak,
57
- "word-list" => :vasya_shmyak}
58
-
59
- def beautify(node = self)
60
- if node == self
61
- self.delete_if { |e| e.is_a?(Text) }
62
- self.each { |e| beautify(e) }
63
- else
64
- node.delete_if { |e| e.is_a?(Text) } if node.respond_to?(:delete_if) && Bismark::CONTAIN_NO_TEXT.has_key?(node.name)
65
- node.each { |e| beautify(e) } if node.respond_to?(:each)
66
- end
67
- end
68
-
69
- end
70
-
1
+ #!/usr/bin/ruby
2
+
3
+ module Ytilib
4
+ require "rexml/document"
5
+ include REXML
6
+
7
+ class Bismark < Document
8
+
9
+ def initialize(source = nil, add_dtd = false)
10
+ dtd = add_dtd ? "<!DOCTYPE smallbismark SYSTEM 'smallbismark.dtd'>#{$/}" : ""
11
+ source == nil ? super("<?xml version='1.0' encoding='UTF-8'?>#{$/}#{dtd}") : super(source)
12
+ super(IO.read(source)) if source != nil && root == nil
13
+ if source == nil
14
+ self.add_element("smallbismark")
15
+ # xmlns breaks XPath for a REXML library under Linux, strange, indeed
16
+ # self.add_element("smallbismark", {"xmlns" => "http://bioinform.imb.ac.ru/smallBiSMark/smallbismark.dtd"})
17
+ self.root.add_element("comment", {"name" => "WARNING"}).add_text("This is a draft version of small-BiSMark. Specification is the subject to change!")
18
+ end
19
+ end
20
+
21
+ def getXML
22
+ beautify
23
+ s = ""; write(s, 1, true)
24
+ s.rstrip!
25
+ return s
26
+ end
27
+ alias get_xml getXML
28
+
29
+ def get_pm(xpath)
30
+ pwmnode = self.elements[xpath]
31
+ pm = PM.new_pm(pwmnode.attribute("length").value.to_i)
32
+ toi = pwmnode.name == "PCM"
33
+ pwmnode.elements.each("pm-column") { |c|
34
+ position = c.attribute("position").value.to_i - 1
35
+ weights = [c.elements["a"].get_text.value.strip.to_f,
36
+ c.elements["c"].get_text.value.strip.to_f,
37
+ c.elements["g"].get_text.value.strip.to_f,
38
+ c.elements["t"].get_text.value.strip.to_f]
39
+ weights.collect { |w| w.to_i } if toi
40
+ pm['A'][position], pm['C'][position], pm['G'][position], pm['T'][position] = weights[0], weights[1], weights[2], weights[3]
41
+ }
42
+ return pm
43
+ end
44
+
45
+ private
46
+ CONTAIN_NO_TEXT = {
47
+ "segment" => :vasya_shmyak,
48
+ "group" => :vasya_shmyak,
49
+ "smallbismark" => :vasya_shmyak,
50
+ "motif" => :vasya_shmyak,
51
+ "PWM" => :vasya_shmyak,
52
+ "PCM" => :vasya_shmyak,
53
+ "PPM" => :vasya_shmyak,
54
+ "source" => :vasya_shmyak,
55
+ "factor" => :vasya_shmyak,
56
+ "pm-column" => :vasya_shmyak,
57
+ "word-list" => :vasya_shmyak}
58
+
59
+ def beautify(node = self)
60
+ if node == self
61
+ self.delete_if { |e| e.is_a?(Text) }
62
+ self.each { |e| beautify(e) }
63
+ else
64
+ node.delete_if { |e| e.is_a?(Text) } if node.respond_to?(:delete_if) && Bismark::CONTAIN_NO_TEXT.has_key?(node.name)
65
+ node.each { |e| beautify(e) } if node.respond_to?(:each)
66
+ end
67
+ end
68
+
69
+ end
70
+
71
71
  end