sequence_logo 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby
1
+ #!/usr/bin/ruby
2
2
  module Ytilib
3
3
  class PM
4
4
  def score_sigma(trycount = 4**10, approx = false, bg = nil)
@@ -0,0 +1,85 @@
1
+ class Object
2
+ def deep_dup
3
+ Marshal.load(Marshal.dump(self))
4
+ end
5
+ end
6
+
7
+ def get_ppm_from_file(in_file_name)
8
+ case File.ext_wo_name(in_file_name)
9
+ when 'pat', 'pcm'
10
+ pm = PM.load(in_file_name)
11
+ pm.fixwc if pm.words_count
12
+ when 'mfa', 'fasta', 'plain'
13
+ pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
14
+ when 'xml'
15
+ pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
16
+ when in_file_name
17
+ pm = PPM.from_IUPAC(in_file_name.upcase)
18
+ end
19
+ pm.get_ppm
20
+ rescue
21
+ nil
22
+ end
23
+
24
+ class PPM
25
+ attr_accessor :name
26
+
27
+ def get_ppm
28
+ self
29
+ end
30
+
31
+ def get_line(v)
32
+ ( (v - icd4of4) / icd4of4 ).abs
33
+ end
34
+
35
+ def get_logo(icd_mode)
36
+ case icd_mode.to_s
37
+ when 'weblogo'
38
+ get_logo_weblogo
39
+ when 'discrete'
40
+ get_logo_discrete
41
+ end
42
+ end
43
+
44
+
45
+ def get_logo_weblogo
46
+ rseq = []
47
+ @matrix['A'].each_index { |i|
48
+ rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
49
+ pn = @matrix[l][i]
50
+ sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
51
+ }
52
+ }
53
+
54
+ mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
55
+ @matrix['A'].each_index { |i|
56
+ ['A','C','G','T'].each { |l|
57
+ mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
58
+ }
59
+ }
60
+
61
+ mat
62
+ end
63
+
64
+ def get_logo_discrete
65
+ checkerr("words count is undefined") { !words_count }
66
+
67
+ rseq = []
68
+ @matrix['A'].each_index { |i|
69
+ rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
70
+ }
71
+
72
+ mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
73
+ @matrix['A'].each_index { |i|
74
+ ['A','C','G','T'].each { |l|
75
+ mat[l][i] = @matrix[l][i] * rseq[i]
76
+ }
77
+ }
78
+
79
+ mat
80
+ end
81
+
82
+ def revcomp
83
+ deep_dup.revcomp!
84
+ end
85
+ end
@@ -1,131 +1,131 @@
1
- #!/usr/bin/ruby
2
- module Ytilib
3
-
4
- srand
5
-
6
- module Randoom
7
-
8
- private
9
-
10
- def Randoom.new_counts
11
- { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
12
- end
13
-
14
- def Randoom.random_letter(probs)
15
- random = rand()
16
- return 'A' if random < probs['A']
17
- return 'C' if random < probs['A'] + probs['C']
18
- return 'G' if random < probs['A'] + probs['C'] + probs['G']
19
- return 'T'
20
- end
21
-
22
- public
23
-
24
- def Randoom.calc_probs(input)
25
- counts = new_counts
26
- counts.default = 0
27
- (0...input.length).each { |i|
28
- counts[input[i,1].upcase] += 1
29
- }
30
- return make_probs!(counts)
31
- end
32
-
33
- def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
34
- randoom = ''
35
- if (probs_m == nil)
36
- req_len.times { randoom << random_letter(probs) }
37
- return randoom
38
- end
39
- random_l = random_letter(probs)
40
- randoom = random_l
41
- (req_len-1).times {
42
- cur_probs = probs_m[random_l]
43
- random_l = random_letter(cur_probs)
44
- randoom << random_l
45
- }
46
- return randoom
47
- end
48
-
49
- def Randoom.calc_probs_m(input)
50
- probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
51
- counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
52
- (0...input.length-1).each { |i|
53
- pair = input[i, 2].upcase
54
- counts[pair[0,1]][pair[1,1]] += 1
55
- }
56
- probs_m['A'] = make_probs!(counts['A'])
57
- probs_m['C'] = make_probs!(counts['C'])
58
- probs_m['G'] = make_probs!(counts['G'])
59
- probs_m['T'] = make_probs!(counts['T'])
60
- return probs_m
61
- end
62
-
63
- def Randoom.make_probs_m!(counts)
64
- ['A','C','G','T','N'].each { |l2|
65
- addv = counts['N'][l2] / 4.0
66
- ['A','C','G','T'].each { |l1|
67
- counts[l1][l2] += addv
68
- }
69
- }
70
-
71
- probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
72
- probs_m['A'] = make_probs!(counts['A'])
73
- probs_m['C'] = make_probs!(counts['C'])
74
- probs_m['G'] = make_probs!(counts['G'])
75
- probs_m['T'] = make_probs!(counts['T'])
76
- return probs_m
77
- end
78
-
79
- def Randoom.make_probs!(counts, length = nil)
80
- probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
81
- length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
82
- length = length.to_f
83
- ['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
84
- return probs if length == 0
85
- probs['A'] = counts['A'] / length
86
- probs['C'] = counts['C'] / length
87
- probs['G'] = counts['G'] / length
88
- probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
89
- return probs
90
- end
91
-
92
- def Randoom.equalize!(probs)
93
- probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
94
- probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
95
- return probs
96
- end
97
-
98
- def Randoom.twostrand!(probs)
99
- return Randoom.equalize!(probs)
100
- end
101
-
102
- DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
103
-
104
- # probabilities counted without _random.fa files for human genome
105
- DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
106
- DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
107
-
108
- DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
109
- "C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
110
- "G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
111
- "T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
112
-
113
- HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
114
- HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
115
-
116
- HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
117
- "C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
118
- "G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
119
- "T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
120
-
121
- HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
122
- HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
123
-
124
- MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
125
- MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
126
-
127
- MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
128
-
129
- end
130
-
131
- end
1
+ #!/usr/bin/ruby
2
+ module Ytilib
3
+
4
+ srand
5
+
6
+ module Randoom
7
+
8
+ private
9
+
10
+ def Randoom.new_counts
11
+ { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
12
+ end
13
+
14
+ def Randoom.random_letter(probs)
15
+ random = rand()
16
+ return 'A' if random < probs['A']
17
+ return 'C' if random < probs['A'] + probs['C']
18
+ return 'G' if random < probs['A'] + probs['C'] + probs['G']
19
+ return 'T'
20
+ end
21
+
22
+ public
23
+
24
+ def Randoom.calc_probs(input)
25
+ counts = new_counts
26
+ counts.default = 0
27
+ (0...input.length).each { |i|
28
+ counts[input[i,1].upcase] += 1
29
+ }
30
+ return make_probs!(counts)
31
+ end
32
+
33
+ def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
34
+ randoom = ''
35
+ if (probs_m == nil)
36
+ req_len.times { randoom << random_letter(probs) }
37
+ return randoom
38
+ end
39
+ random_l = random_letter(probs)
40
+ randoom = random_l
41
+ (req_len-1).times {
42
+ cur_probs = probs_m[random_l]
43
+ random_l = random_letter(cur_probs)
44
+ randoom << random_l
45
+ }
46
+ return randoom
47
+ end
48
+
49
+ def Randoom.calc_probs_m(input)
50
+ probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
51
+ counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
52
+ (0...input.length-1).each { |i|
53
+ pair = input[i, 2].upcase
54
+ counts[pair[0,1]][pair[1,1]] += 1
55
+ }
56
+ probs_m['A'] = make_probs!(counts['A'])
57
+ probs_m['C'] = make_probs!(counts['C'])
58
+ probs_m['G'] = make_probs!(counts['G'])
59
+ probs_m['T'] = make_probs!(counts['T'])
60
+ return probs_m
61
+ end
62
+
63
+ def Randoom.make_probs_m!(counts)
64
+ ['A','C','G','T','N'].each { |l2|
65
+ addv = counts['N'][l2] / 4.0
66
+ ['A','C','G','T'].each { |l1|
67
+ counts[l1][l2] += addv
68
+ }
69
+ }
70
+
71
+ probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
72
+ probs_m['A'] = make_probs!(counts['A'])
73
+ probs_m['C'] = make_probs!(counts['C'])
74
+ probs_m['G'] = make_probs!(counts['G'])
75
+ probs_m['T'] = make_probs!(counts['T'])
76
+ return probs_m
77
+ end
78
+
79
+ def Randoom.make_probs!(counts, length = nil)
80
+ probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
81
+ length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
82
+ length = length.to_f
83
+ ['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
84
+ return probs if length == 0
85
+ probs['A'] = counts['A'] / length
86
+ probs['C'] = counts['C'] / length
87
+ probs['G'] = counts['G'] / length
88
+ probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
89
+ return probs
90
+ end
91
+
92
+ def Randoom.equalize!(probs)
93
+ probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
94
+ probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
95
+ return probs
96
+ end
97
+
98
+ def Randoom.twostrand!(probs)
99
+ return Randoom.equalize!(probs)
100
+ end
101
+
102
+ DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
103
+
104
+ # probabilities counted without _random.fa files for human genome
105
+ DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
106
+ DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
107
+
108
+ DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
109
+ "C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
110
+ "G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
111
+ "T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
112
+
113
+ HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
114
+ HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
115
+
116
+ HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
117
+ "C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
118
+ "G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
119
+ "T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
120
+
121
+ HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
122
+ HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
123
+
124
+ MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
125
+ MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
126
+
127
+ MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
128
+
129
+ end
130
+
131
+ end
@@ -1,147 +1,147 @@
1
- module Ytilib
2
- def Ytilib.time
3
- return Time.now.strftime('%d %b %H:%M:%S')
4
- end
5
- end
6
-
7
- $program_name = nil
8
-
9
- def start(fullpath)
10
- report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
11
- return if $NO_REPORT
12
- $program_name = "[#{File.name_wo_ext(fullpath)}]"
13
- end
14
-
15
- def report(message, program_name = nil)
16
- $program_name = "[#{program_name}]" if program_name != nil
17
- return if $NO_REPORT
18
- puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
19
- end
20
-
21
- def checkerr(message = "checkerr failed")
22
- if !block_given? || yield
23
- puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
24
- raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
25
- end
26
- end
27
-
28
- module Ytilib
29
-
30
- STRAND_DIRECT = "direct"
31
- STRAND_REVCOMP = "revcomp"
32
-
33
- def Ytilib.read_mfa2hash(path)
34
- input_fasta_f = File.new(path, "r")
35
- seqs, seq_name = {}, nil
36
- input_fasta_f.each_line { |line|
37
- if line[0,1] == ">"
38
- seq_name = line[1..-1].strip
39
- seq_name = yield seq_name if block_given?
40
- checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
41
- seqs[seq_name] = ""
42
- elsif seq_name != nil
43
- seqs[seq_name] << line.strip
44
- end
45
- }
46
- input_fasta_f.close
47
- return seqs
48
- end
49
-
50
- def Ytilib.read_mfa2array(path)
51
- input_fasta_f = File.new(path, "r")
52
- seqs, seq_name = [], nil
53
- input_fasta_f.each_line { |line|
54
- if line[0,1] == ">"
55
- seq_name = line[1..-1].strip
56
- yield seq_name if block_given?
57
- seqs << ""
58
- elsif seq_name != nil
59
- seqs.last << line.strip
60
- end
61
- }
62
- input_fasta_f.close
63
- return seqs
64
- end
65
-
66
- def Ytilib.mfa2array(input)
67
- seqs, seq_name = [], nil
68
- input.each_line { |line|
69
- if line[0,1] == ">"
70
- seq_name = line[1..-1].strip
71
- seqs << ""
72
- elsif seq_name != nil
73
- seqs.last << line.strip
74
- end
75
- }
76
- return seqs
77
- end
78
-
79
- def Ytilib.read_plain2array(path)
80
- array = []
81
- File.open(path).each_line { |line|
82
- array << line.strip if !line.strip.empty?
83
- }
84
- return array
85
- end
86
-
87
- def Ytilib.read_seqs2array(path)
88
- type = File.ext_wo_name(path)
89
- case type
90
- when "mfa", "fasta", "fa"
91
- return Ytilib.read_mfa2array(path)
92
- when "plain","txt"
93
- return Ytilib.read_plain2array(path)
94
- else
95
- checkerr("unknown sequences-file, ext=#{type}")
96
- end
97
- end
98
-
99
- def Ytilib.write_mfa(seqs, path, prefix = " ")
100
- if seqs.is_a?(Hash)
101
- out_fasta_f = File.new(path, "w+")
102
- seqs.each_key { |name|
103
- out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
104
- }
105
- out_fasta_f.close
106
- else
107
- out_fasta_f = File.new(path, "w+")
108
- seqs.each_with_index { |seq, i|
109
- out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
110
- }
111
- out_fasta_f.close
112
- end
113
- end
114
-
115
- def get_consensus(seqs)
116
- report "consensus creating method should be checked, you are using unsafe code"
117
- return 'nil' if seqs.size == 0
118
- conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
119
- 'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
120
- 'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
121
- }
122
- new_consensus, letters = '', []
123
- 0.upto(seqs[0].size-1) { |i|
124
- seqs.each do |word|
125
- letters << word[i] if !letters.include?(word[i])
126
- end
127
- letters.sort!
128
- letters_string = ''
129
- letters.each do |letter| letters_string << letter end
130
- checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
131
- new_consensus << conslet[letters_string]
132
- letters.clear
133
- }
134
- return new_consensus
135
- end
136
-
137
- def Ytilib.new_mysql_conn(database)
138
- my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
139
- checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
140
- return my
141
- end
142
-
143
-
144
- end
145
-
146
- report "ytilib required, working directory #{Dir.pwd}", "ytilib"
1
+ module Ytilib
2
+ def Ytilib.time
3
+ return Time.now.strftime('%d %b %H:%M:%S')
4
+ end
5
+ end
6
+
7
+ $program_name = nil
8
+
9
+ def start(fullpath)
10
+ report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
11
+ return if $NO_REPORT
12
+ $program_name = "[#{File.name_wo_ext(fullpath)}]"
13
+ end
14
+
15
+ def report(message, program_name = nil)
16
+ $program_name = "[#{program_name}]" if program_name != nil
17
+ return if $NO_REPORT
18
+ puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
19
+ end
20
+
21
+ def checkerr(message = "checkerr failed")
22
+ if !block_given? || yield
23
+ puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
24
+ raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
25
+ end
26
+ end
27
+
28
+ module Ytilib
29
+
30
+ STRAND_DIRECT = "direct"
31
+ STRAND_REVCOMP = "revcomp"
32
+
33
+ def Ytilib.read_mfa2hash(path)
34
+ input_fasta_f = File.new(path, "r")
35
+ seqs, seq_name = {}, nil
36
+ input_fasta_f.each_line { |line|
37
+ if line[0,1] == ">"
38
+ seq_name = line[1..-1].strip
39
+ seq_name = yield seq_name if block_given?
40
+ checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
41
+ seqs[seq_name] = ""
42
+ elsif seq_name != nil
43
+ seqs[seq_name] << line.strip
44
+ end
45
+ }
46
+ input_fasta_f.close
47
+ return seqs
48
+ end
49
+
50
+ def Ytilib.read_mfa2array(path)
51
+ input_fasta_f = File.new(path, "r")
52
+ seqs, seq_name = [], nil
53
+ input_fasta_f.each_line { |line|
54
+ if line[0,1] == ">"
55
+ seq_name = line[1..-1].strip
56
+ yield seq_name if block_given?
57
+ seqs << ""
58
+ elsif seq_name != nil
59
+ seqs.last << line.strip
60
+ end
61
+ }
62
+ input_fasta_f.close
63
+ return seqs
64
+ end
65
+
66
+ def Ytilib.mfa2array(input)
67
+ seqs, seq_name = [], nil
68
+ input.each_line { |line|
69
+ if line[0,1] == ">"
70
+ seq_name = line[1..-1].strip
71
+ seqs << ""
72
+ elsif seq_name != nil
73
+ seqs.last << line.strip
74
+ end
75
+ }
76
+ return seqs
77
+ end
78
+
79
+ def Ytilib.read_plain2array(path)
80
+ array = []
81
+ File.open(path).each_line { |line|
82
+ array << line.strip if !line.strip.empty?
83
+ }
84
+ return array
85
+ end
86
+
87
+ def Ytilib.read_seqs2array(path)
88
+ type = File.ext_wo_name(path)
89
+ case type
90
+ when "mfa", "fasta", "fa"
91
+ return Ytilib.read_mfa2array(path)
92
+ when "plain","txt"
93
+ return Ytilib.read_plain2array(path)
94
+ else
95
+ checkerr("unknown sequences-file, ext=#{type}")
96
+ end
97
+ end
98
+
99
+ def Ytilib.write_mfa(seqs, path, prefix = " ")
100
+ if seqs.is_a?(Hash)
101
+ out_fasta_f = File.new(path, "w+")
102
+ seqs.each_key { |name|
103
+ out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
104
+ }
105
+ out_fasta_f.close
106
+ else
107
+ out_fasta_f = File.new(path, "w+")
108
+ seqs.each_with_index { |seq, i|
109
+ out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
110
+ }
111
+ out_fasta_f.close
112
+ end
113
+ end
114
+
115
+ def get_consensus(seqs)
116
+ report "consensus creating method should be checked, you are using unsafe code"
117
+ return 'nil' if seqs.size == 0
118
+ conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
119
+ 'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
120
+ 'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
121
+ }
122
+ new_consensus, letters = '', []
123
+ 0.upto(seqs[0].size-1) { |i|
124
+ seqs.each do |word|
125
+ letters << word[i] if !letters.include?(word[i])
126
+ end
127
+ letters.sort!
128
+ letters_string = ''
129
+ letters.each do |letter| letters_string << letter end
130
+ checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
131
+ new_consensus << conslet[letters_string]
132
+ letters.clear
133
+ }
134
+ return new_consensus
135
+ end
136
+
137
+ def Ytilib.new_mysql_conn(database)
138
+ my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
139
+ checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
140
+ return my
141
+ end
142
+
143
+
144
+ end
145
+
146
+ #report "ytilib required, working directory #{Dir.pwd}", "ytilib"
147
147
  include Ytilib