sequence_logo 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby
1
+ #!/usr/bin/ruby
2
2
  module Ytilib
3
3
  class PM
4
4
  def score_sigma(trycount = 4**10, approx = false, bg = nil)
@@ -0,0 +1,85 @@
1
+ class Object
2
+ def deep_dup
3
+ Marshal.load(Marshal.dump(self))
4
+ end
5
+ end
6
+
7
+ def get_ppm_from_file(in_file_name)
8
+ case File.ext_wo_name(in_file_name)
9
+ when 'pat', 'pcm'
10
+ pm = PM.load(in_file_name)
11
+ pm.fixwc if pm.words_count
12
+ when 'mfa', 'fasta', 'plain'
13
+ pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
14
+ when 'xml'
15
+ pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
16
+ when in_file_name
17
+ pm = PPM.from_IUPAC(in_file_name.upcase)
18
+ end
19
+ pm.get_ppm
20
+ rescue
21
+ nil
22
+ end
23
+
24
+ class PPM
25
+ attr_accessor :name
26
+
27
+ def get_ppm
28
+ self
29
+ end
30
+
31
+ def get_line(v)
32
+ ( (v - icd4of4) / icd4of4 ).abs
33
+ end
34
+
35
+ def get_logo(icd_mode)
36
+ case icd_mode.to_s
37
+ when 'weblogo'
38
+ get_logo_weblogo
39
+ when 'discrete'
40
+ get_logo_discrete
41
+ end
42
+ end
43
+
44
+
45
+ def get_logo_weblogo
46
+ rseq = []
47
+ @matrix['A'].each_index { |i|
48
+ rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
49
+ pn = @matrix[l][i]
50
+ sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
51
+ }
52
+ }
53
+
54
+ mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
55
+ @matrix['A'].each_index { |i|
56
+ ['A','C','G','T'].each { |l|
57
+ mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
58
+ }
59
+ }
60
+
61
+ mat
62
+ end
63
+
64
+ def get_logo_discrete
65
+ checkerr("words count is undefined") { !words_count }
66
+
67
+ rseq = []
68
+ @matrix['A'].each_index { |i|
69
+ rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
70
+ }
71
+
72
+ mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
73
+ @matrix['A'].each_index { |i|
74
+ ['A','C','G','T'].each { |l|
75
+ mat[l][i] = @matrix[l][i] * rseq[i]
76
+ }
77
+ }
78
+
79
+ mat
80
+ end
81
+
82
+ def revcomp
83
+ deep_dup.revcomp!
84
+ end
85
+ end
@@ -1,131 +1,131 @@
1
- #!/usr/bin/ruby
2
- module Ytilib
3
-
4
- srand
5
-
6
- module Randoom
7
-
8
- private
9
-
10
- def Randoom.new_counts
11
- { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
12
- end
13
-
14
- def Randoom.random_letter(probs)
15
- random = rand()
16
- return 'A' if random < probs['A']
17
- return 'C' if random < probs['A'] + probs['C']
18
- return 'G' if random < probs['A'] + probs['C'] + probs['G']
19
- return 'T'
20
- end
21
-
22
- public
23
-
24
- def Randoom.calc_probs(input)
25
- counts = new_counts
26
- counts.default = 0
27
- (0...input.length).each { |i|
28
- counts[input[i,1].upcase] += 1
29
- }
30
- return make_probs!(counts)
31
- end
32
-
33
- def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
34
- randoom = ''
35
- if (probs_m == nil)
36
- req_len.times { randoom << random_letter(probs) }
37
- return randoom
38
- end
39
- random_l = random_letter(probs)
40
- randoom = random_l
41
- (req_len-1).times {
42
- cur_probs = probs_m[random_l]
43
- random_l = random_letter(cur_probs)
44
- randoom << random_l
45
- }
46
- return randoom
47
- end
48
-
49
- def Randoom.calc_probs_m(input)
50
- probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
51
- counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
52
- (0...input.length-1).each { |i|
53
- pair = input[i, 2].upcase
54
- counts[pair[0,1]][pair[1,1]] += 1
55
- }
56
- probs_m['A'] = make_probs!(counts['A'])
57
- probs_m['C'] = make_probs!(counts['C'])
58
- probs_m['G'] = make_probs!(counts['G'])
59
- probs_m['T'] = make_probs!(counts['T'])
60
- return probs_m
61
- end
62
-
63
- def Randoom.make_probs_m!(counts)
64
- ['A','C','G','T','N'].each { |l2|
65
- addv = counts['N'][l2] / 4.0
66
- ['A','C','G','T'].each { |l1|
67
- counts[l1][l2] += addv
68
- }
69
- }
70
-
71
- probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
72
- probs_m['A'] = make_probs!(counts['A'])
73
- probs_m['C'] = make_probs!(counts['C'])
74
- probs_m['G'] = make_probs!(counts['G'])
75
- probs_m['T'] = make_probs!(counts['T'])
76
- return probs_m
77
- end
78
-
79
- def Randoom.make_probs!(counts, length = nil)
80
- probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
81
- length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
82
- length = length.to_f
83
- ['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
84
- return probs if length == 0
85
- probs['A'] = counts['A'] / length
86
- probs['C'] = counts['C'] / length
87
- probs['G'] = counts['G'] / length
88
- probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
89
- return probs
90
- end
91
-
92
- def Randoom.equalize!(probs)
93
- probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
94
- probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
95
- return probs
96
- end
97
-
98
- def Randoom.twostrand!(probs)
99
- return Randoom.equalize!(probs)
100
- end
101
-
102
- DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
103
-
104
- # probabilities counted without _random.fa files for human genome
105
- DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
106
- DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
107
-
108
- DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
109
- "C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
110
- "G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
111
- "T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
112
-
113
- HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
114
- HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
115
-
116
- HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
117
- "C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
118
- "G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
119
- "T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
120
-
121
- HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
122
- HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
123
-
124
- MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
125
- MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
126
-
127
- MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
128
-
129
- end
130
-
131
- end
1
+ #!/usr/bin/ruby
2
+ module Ytilib
3
+
4
+ srand
5
+
6
+ module Randoom
7
+
8
+ private
9
+
10
+ def Randoom.new_counts
11
+ { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
12
+ end
13
+
14
+ def Randoom.random_letter(probs)
15
+ random = rand()
16
+ return 'A' if random < probs['A']
17
+ return 'C' if random < probs['A'] + probs['C']
18
+ return 'G' if random < probs['A'] + probs['C'] + probs['G']
19
+ return 'T'
20
+ end
21
+
22
+ public
23
+
24
+ def Randoom.calc_probs(input)
25
+ counts = new_counts
26
+ counts.default = 0
27
+ (0...input.length).each { |i|
28
+ counts[input[i,1].upcase] += 1
29
+ }
30
+ return make_probs!(counts)
31
+ end
32
+
33
+ def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
34
+ randoom = ''
35
+ if (probs_m == nil)
36
+ req_len.times { randoom << random_letter(probs) }
37
+ return randoom
38
+ end
39
+ random_l = random_letter(probs)
40
+ randoom = random_l
41
+ (req_len-1).times {
42
+ cur_probs = probs_m[random_l]
43
+ random_l = random_letter(cur_probs)
44
+ randoom << random_l
45
+ }
46
+ return randoom
47
+ end
48
+
49
+ def Randoom.calc_probs_m(input)
50
+ probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
51
+ counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
52
+ (0...input.length-1).each { |i|
53
+ pair = input[i, 2].upcase
54
+ counts[pair[0,1]][pair[1,1]] += 1
55
+ }
56
+ probs_m['A'] = make_probs!(counts['A'])
57
+ probs_m['C'] = make_probs!(counts['C'])
58
+ probs_m['G'] = make_probs!(counts['G'])
59
+ probs_m['T'] = make_probs!(counts['T'])
60
+ return probs_m
61
+ end
62
+
63
+ def Randoom.make_probs_m!(counts)
64
+ ['A','C','G','T','N'].each { |l2|
65
+ addv = counts['N'][l2] / 4.0
66
+ ['A','C','G','T'].each { |l1|
67
+ counts[l1][l2] += addv
68
+ }
69
+ }
70
+
71
+ probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
72
+ probs_m['A'] = make_probs!(counts['A'])
73
+ probs_m['C'] = make_probs!(counts['C'])
74
+ probs_m['G'] = make_probs!(counts['G'])
75
+ probs_m['T'] = make_probs!(counts['T'])
76
+ return probs_m
77
+ end
78
+
79
+ def Randoom.make_probs!(counts, length = nil)
80
+ probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
81
+ length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
82
+ length = length.to_f
83
+ ['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
84
+ return probs if length == 0
85
+ probs['A'] = counts['A'] / length
86
+ probs['C'] = counts['C'] / length
87
+ probs['G'] = counts['G'] / length
88
+ probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
89
+ return probs
90
+ end
91
+
92
+ def Randoom.equalize!(probs)
93
+ probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
94
+ probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
95
+ return probs
96
+ end
97
+
98
+ def Randoom.twostrand!(probs)
99
+ return Randoom.equalize!(probs)
100
+ end
101
+
102
+ DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
103
+
104
+ # probabilities counted without _random.fa files for human genome
105
+ DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
106
+ DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
107
+
108
+ DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
109
+ "C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
110
+ "G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
111
+ "T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
112
+
113
+ HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
114
+ HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
115
+
116
+ HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
117
+ "C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
118
+ "G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
119
+ "T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
120
+
121
+ HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
122
+ HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
123
+
124
+ MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
125
+ MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
126
+
127
+ MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
128
+
129
+ end
130
+
131
+ end
@@ -1,147 +1,147 @@
1
- module Ytilib
2
- def Ytilib.time
3
- return Time.now.strftime('%d %b %H:%M:%S')
4
- end
5
- end
6
-
7
- $program_name = nil
8
-
9
- def start(fullpath)
10
- report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
11
- return if $NO_REPORT
12
- $program_name = "[#{File.name_wo_ext(fullpath)}]"
13
- end
14
-
15
- def report(message, program_name = nil)
16
- $program_name = "[#{program_name}]" if program_name != nil
17
- return if $NO_REPORT
18
- puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
19
- end
20
-
21
- def checkerr(message = "checkerr failed")
22
- if !block_given? || yield
23
- puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
24
- raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
25
- end
26
- end
27
-
28
- module Ytilib
29
-
30
- STRAND_DIRECT = "direct"
31
- STRAND_REVCOMP = "revcomp"
32
-
33
- def Ytilib.read_mfa2hash(path)
34
- input_fasta_f = File.new(path, "r")
35
- seqs, seq_name = {}, nil
36
- input_fasta_f.each_line { |line|
37
- if line[0,1] == ">"
38
- seq_name = line[1..-1].strip
39
- seq_name = yield seq_name if block_given?
40
- checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
41
- seqs[seq_name] = ""
42
- elsif seq_name != nil
43
- seqs[seq_name] << line.strip
44
- end
45
- }
46
- input_fasta_f.close
47
- return seqs
48
- end
49
-
50
- def Ytilib.read_mfa2array(path)
51
- input_fasta_f = File.new(path, "r")
52
- seqs, seq_name = [], nil
53
- input_fasta_f.each_line { |line|
54
- if line[0,1] == ">"
55
- seq_name = line[1..-1].strip
56
- yield seq_name if block_given?
57
- seqs << ""
58
- elsif seq_name != nil
59
- seqs.last << line.strip
60
- end
61
- }
62
- input_fasta_f.close
63
- return seqs
64
- end
65
-
66
- def Ytilib.mfa2array(input)
67
- seqs, seq_name = [], nil
68
- input.each_line { |line|
69
- if line[0,1] == ">"
70
- seq_name = line[1..-1].strip
71
- seqs << ""
72
- elsif seq_name != nil
73
- seqs.last << line.strip
74
- end
75
- }
76
- return seqs
77
- end
78
-
79
- def Ytilib.read_plain2array(path)
80
- array = []
81
- File.open(path).each_line { |line|
82
- array << line.strip if !line.strip.empty?
83
- }
84
- return array
85
- end
86
-
87
- def Ytilib.read_seqs2array(path)
88
- type = File.ext_wo_name(path)
89
- case type
90
- when "mfa", "fasta", "fa"
91
- return Ytilib.read_mfa2array(path)
92
- when "plain","txt"
93
- return Ytilib.read_plain2array(path)
94
- else
95
- checkerr("unknown sequences-file, ext=#{type}")
96
- end
97
- end
98
-
99
- def Ytilib.write_mfa(seqs, path, prefix = " ")
100
- if seqs.is_a?(Hash)
101
- out_fasta_f = File.new(path, "w+")
102
- seqs.each_key { |name|
103
- out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
104
- }
105
- out_fasta_f.close
106
- else
107
- out_fasta_f = File.new(path, "w+")
108
- seqs.each_with_index { |seq, i|
109
- out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
110
- }
111
- out_fasta_f.close
112
- end
113
- end
114
-
115
- def get_consensus(seqs)
116
- report "consensus creating method should be checked, you are using unsafe code"
117
- return 'nil' if seqs.size == 0
118
- conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
119
- 'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
120
- 'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
121
- }
122
- new_consensus, letters = '', []
123
- 0.upto(seqs[0].size-1) { |i|
124
- seqs.each do |word|
125
- letters << word[i] if !letters.include?(word[i])
126
- end
127
- letters.sort!
128
- letters_string = ''
129
- letters.each do |letter| letters_string << letter end
130
- checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
131
- new_consensus << conslet[letters_string]
132
- letters.clear
133
- }
134
- return new_consensus
135
- end
136
-
137
- def Ytilib.new_mysql_conn(database)
138
- my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
139
- checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
140
- return my
141
- end
142
-
143
-
144
- end
145
-
146
- report "ytilib required, working directory #{Dir.pwd}", "ytilib"
1
+ module Ytilib
2
+ def Ytilib.time
3
+ return Time.now.strftime('%d %b %H:%M:%S')
4
+ end
5
+ end
6
+
7
+ $program_name = nil
8
+
9
+ def start(fullpath)
10
+ report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
11
+ return if $NO_REPORT
12
+ $program_name = "[#{File.name_wo_ext(fullpath)}]"
13
+ end
14
+
15
+ def report(message, program_name = nil)
16
+ $program_name = "[#{program_name}]" if program_name != nil
17
+ return if $NO_REPORT
18
+ puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
19
+ end
20
+
21
+ def checkerr(message = "checkerr failed")
22
+ if !block_given? || yield
23
+ puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
24
+ raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
25
+ end
26
+ end
27
+
28
+ module Ytilib
29
+
30
+ STRAND_DIRECT = "direct"
31
+ STRAND_REVCOMP = "revcomp"
32
+
33
+ def Ytilib.read_mfa2hash(path)
34
+ input_fasta_f = File.new(path, "r")
35
+ seqs, seq_name = {}, nil
36
+ input_fasta_f.each_line { |line|
37
+ if line[0,1] == ">"
38
+ seq_name = line[1..-1].strip
39
+ seq_name = yield seq_name if block_given?
40
+ checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
41
+ seqs[seq_name] = ""
42
+ elsif seq_name != nil
43
+ seqs[seq_name] << line.strip
44
+ end
45
+ }
46
+ input_fasta_f.close
47
+ return seqs
48
+ end
49
+
50
+ def Ytilib.read_mfa2array(path)
51
+ input_fasta_f = File.new(path, "r")
52
+ seqs, seq_name = [], nil
53
+ input_fasta_f.each_line { |line|
54
+ if line[0,1] == ">"
55
+ seq_name = line[1..-1].strip
56
+ yield seq_name if block_given?
57
+ seqs << ""
58
+ elsif seq_name != nil
59
+ seqs.last << line.strip
60
+ end
61
+ }
62
+ input_fasta_f.close
63
+ return seqs
64
+ end
65
+
66
+ def Ytilib.mfa2array(input)
67
+ seqs, seq_name = [], nil
68
+ input.each_line { |line|
69
+ if line[0,1] == ">"
70
+ seq_name = line[1..-1].strip
71
+ seqs << ""
72
+ elsif seq_name != nil
73
+ seqs.last << line.strip
74
+ end
75
+ }
76
+ return seqs
77
+ end
78
+
79
+ def Ytilib.read_plain2array(path)
80
+ array = []
81
+ File.open(path).each_line { |line|
82
+ array << line.strip if !line.strip.empty?
83
+ }
84
+ return array
85
+ end
86
+
87
+ def Ytilib.read_seqs2array(path)
88
+ type = File.ext_wo_name(path)
89
+ case type
90
+ when "mfa", "fasta", "fa"
91
+ return Ytilib.read_mfa2array(path)
92
+ when "plain","txt"
93
+ return Ytilib.read_plain2array(path)
94
+ else
95
+ checkerr("unknown sequences-file, ext=#{type}")
96
+ end
97
+ end
98
+
99
+ def Ytilib.write_mfa(seqs, path, prefix = " ")
100
+ if seqs.is_a?(Hash)
101
+ out_fasta_f = File.new(path, "w+")
102
+ seqs.each_key { |name|
103
+ out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
104
+ }
105
+ out_fasta_f.close
106
+ else
107
+ out_fasta_f = File.new(path, "w+")
108
+ seqs.each_with_index { |seq, i|
109
+ out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
110
+ }
111
+ out_fasta_f.close
112
+ end
113
+ end
114
+
115
+ def get_consensus(seqs)
116
+ report "consensus creating method should be checked, you are using unsafe code"
117
+ return 'nil' if seqs.size == 0
118
+ conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
119
+ 'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
120
+ 'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
121
+ }
122
+ new_consensus, letters = '', []
123
+ 0.upto(seqs[0].size-1) { |i|
124
+ seqs.each do |word|
125
+ letters << word[i] if !letters.include?(word[i])
126
+ end
127
+ letters.sort!
128
+ letters_string = ''
129
+ letters.each do |letter| letters_string << letter end
130
+ checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
131
+ new_consensus << conslet[letters_string]
132
+ letters.clear
133
+ }
134
+ return new_consensus
135
+ end
136
+
137
+ def Ytilib.new_mysql_conn(database)
138
+ my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
139
+ checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
140
+ return my
141
+ end
142
+
143
+
144
+ end
145
+
146
+ #report "ytilib required, working directory #{Dir.pwd}", "ytilib"
147
147
  include Ytilib