sequence_logo 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +21 -21
- data/README.md +65 -60
- data/Rakefile +5 -5
- data/TODO.txt +8 -0
- data/bin/glue_logos +3 -0
- data/bin/sequence_logo +3 -0
- data/lib/sequence_logo.rb +7 -7
- data/lib/sequence_logo/cli.rb +37 -0
- data/lib/sequence_logo/exec/glue_logos.rb +67 -0
- data/lib/sequence_logo/exec/sequence_logo.rb +52 -0
- data/lib/sequence_logo/pmflogo_lib.rb +95 -174
- data/lib/sequence_logo/version.rb +3 -3
- data/lib/sequence_logo/ytilib.rb +10 -9
- data/lib/sequence_logo/ytilib/addon.rb +246 -246
- data/lib/sequence_logo/ytilib/bismark.rb +70 -70
- data/lib/sequence_logo/ytilib/hack1.rb +75 -75
- data/lib/sequence_logo/ytilib/pm.rb +562 -562
- data/lib/sequence_logo/ytilib/pmsd.rb +1 -1
- data/lib/sequence_logo/ytilib/ppm_support.rb +85 -0
- data/lib/sequence_logo/ytilib/randoom.rb +131 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +146 -146
- data/sequence_logo.gemspec +21 -21
- data/test/data/logo/AHR_si_direct.png +0 -0
- data/test/data/logo/AHR_si_revcomp.png +0 -0
- data/test/data/logo/AIRE_f2_direct.png +0 -0
- data/test/data/logo/AIRE_f2_revcomp.png +0 -0
- data/test/data/pcm/AHR_si.pcm +10 -0
- data/test/data/pcm/AIRE_f2.pcm +19 -0
- metadata +33 -32
- data/bin/create_all_logos +0 -3
- data/bin/generate_logo +0 -3
- data/bin/pmflogo +0 -3
- data/lib/sequence_logo/exec/create_all_logos.rb +0 -25
- data/lib/sequence_logo/exec/generate_logo.rb +0 -18
- data/lib/sequence_logo/exec/pmflogo.rb +0 -26
@@ -0,0 +1,85 @@
|
|
1
|
+
class Object
|
2
|
+
def deep_dup
|
3
|
+
Marshal.load(Marshal.dump(self))
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
def get_ppm_from_file(in_file_name)
|
8
|
+
case File.ext_wo_name(in_file_name)
|
9
|
+
when 'pat', 'pcm'
|
10
|
+
pm = PM.load(in_file_name)
|
11
|
+
pm.fixwc if pm.words_count
|
12
|
+
when 'mfa', 'fasta', 'plain'
|
13
|
+
pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
|
14
|
+
when 'xml'
|
15
|
+
pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
|
16
|
+
when in_file_name
|
17
|
+
pm = PPM.from_IUPAC(in_file_name.upcase)
|
18
|
+
end
|
19
|
+
pm.get_ppm
|
20
|
+
rescue
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
|
24
|
+
class PPM
|
25
|
+
attr_accessor :name
|
26
|
+
|
27
|
+
def get_ppm
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_line(v)
|
32
|
+
( (v - icd4of4) / icd4of4 ).abs
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_logo(icd_mode)
|
36
|
+
case icd_mode.to_s
|
37
|
+
when 'weblogo'
|
38
|
+
get_logo_weblogo
|
39
|
+
when 'discrete'
|
40
|
+
get_logo_discrete
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def get_logo_weblogo
|
46
|
+
rseq = []
|
47
|
+
@matrix['A'].each_index { |i|
|
48
|
+
rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
|
49
|
+
pn = @matrix[l][i]
|
50
|
+
sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
|
55
|
+
@matrix['A'].each_index { |i|
|
56
|
+
['A','C','G','T'].each { |l|
|
57
|
+
mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
mat
|
62
|
+
end
|
63
|
+
|
64
|
+
def get_logo_discrete
|
65
|
+
checkerr("words count is undefined") { !words_count }
|
66
|
+
|
67
|
+
rseq = []
|
68
|
+
@matrix['A'].each_index { |i|
|
69
|
+
rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
|
70
|
+
}
|
71
|
+
|
72
|
+
mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
|
73
|
+
@matrix['A'].each_index { |i|
|
74
|
+
['A','C','G','T'].each { |l|
|
75
|
+
mat[l][i] = @matrix[l][i] * rseq[i]
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
mat
|
80
|
+
end
|
81
|
+
|
82
|
+
def revcomp
|
83
|
+
deep_dup.revcomp!
|
84
|
+
end
|
85
|
+
end
|
@@ -1,131 +1,131 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
module Ytilib
|
3
|
-
|
4
|
-
srand
|
5
|
-
|
6
|
-
module Randoom
|
7
|
-
|
8
|
-
private
|
9
|
-
|
10
|
-
def Randoom.new_counts
|
11
|
-
{ 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
|
12
|
-
end
|
13
|
-
|
14
|
-
def Randoom.random_letter(probs)
|
15
|
-
random = rand()
|
16
|
-
return 'A' if random < probs['A']
|
17
|
-
return 'C' if random < probs['A'] + probs['C']
|
18
|
-
return 'G' if random < probs['A'] + probs['C'] + probs['G']
|
19
|
-
return 'T'
|
20
|
-
end
|
21
|
-
|
22
|
-
public
|
23
|
-
|
24
|
-
def Randoom.calc_probs(input)
|
25
|
-
counts = new_counts
|
26
|
-
counts.default = 0
|
27
|
-
(0...input.length).each { |i|
|
28
|
-
counts[input[i,1].upcase] += 1
|
29
|
-
}
|
30
|
-
return make_probs!(counts)
|
31
|
-
end
|
32
|
-
|
33
|
-
def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
|
34
|
-
randoom = ''
|
35
|
-
if (probs_m == nil)
|
36
|
-
req_len.times { randoom << random_letter(probs) }
|
37
|
-
return randoom
|
38
|
-
end
|
39
|
-
random_l = random_letter(probs)
|
40
|
-
randoom = random_l
|
41
|
-
(req_len-1).times {
|
42
|
-
cur_probs = probs_m[random_l]
|
43
|
-
random_l = random_letter(cur_probs)
|
44
|
-
randoom << random_l
|
45
|
-
}
|
46
|
-
return randoom
|
47
|
-
end
|
48
|
-
|
49
|
-
def Randoom.calc_probs_m(input)
|
50
|
-
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
51
|
-
counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
|
52
|
-
(0...input.length-1).each { |i|
|
53
|
-
pair = input[i, 2].upcase
|
54
|
-
counts[pair[0,1]][pair[1,1]] += 1
|
55
|
-
}
|
56
|
-
probs_m['A'] = make_probs!(counts['A'])
|
57
|
-
probs_m['C'] = make_probs!(counts['C'])
|
58
|
-
probs_m['G'] = make_probs!(counts['G'])
|
59
|
-
probs_m['T'] = make_probs!(counts['T'])
|
60
|
-
return probs_m
|
61
|
-
end
|
62
|
-
|
63
|
-
def Randoom.make_probs_m!(counts)
|
64
|
-
['A','C','G','T','N'].each { |l2|
|
65
|
-
addv = counts['N'][l2] / 4.0
|
66
|
-
['A','C','G','T'].each { |l1|
|
67
|
-
counts[l1][l2] += addv
|
68
|
-
}
|
69
|
-
}
|
70
|
-
|
71
|
-
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
72
|
-
probs_m['A'] = make_probs!(counts['A'])
|
73
|
-
probs_m['C'] = make_probs!(counts['C'])
|
74
|
-
probs_m['G'] = make_probs!(counts['G'])
|
75
|
-
probs_m['T'] = make_probs!(counts['T'])
|
76
|
-
return probs_m
|
77
|
-
end
|
78
|
-
|
79
|
-
def Randoom.make_probs!(counts, length = nil)
|
80
|
-
probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
|
81
|
-
length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
|
82
|
-
length = length.to_f
|
83
|
-
['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
|
84
|
-
return probs if length == 0
|
85
|
-
probs['A'] = counts['A'] / length
|
86
|
-
probs['C'] = counts['C'] / length
|
87
|
-
probs['G'] = counts['G'] / length
|
88
|
-
probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
|
89
|
-
return probs
|
90
|
-
end
|
91
|
-
|
92
|
-
def Randoom.equalize!(probs)
|
93
|
-
probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
|
94
|
-
probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
|
95
|
-
return probs
|
96
|
-
end
|
97
|
-
|
98
|
-
def Randoom.twostrand!(probs)
|
99
|
-
return Randoom.equalize!(probs)
|
100
|
-
end
|
101
|
-
|
102
|
-
DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
|
103
|
-
|
104
|
-
# probabilities counted without _random.fa files for human genome
|
105
|
-
DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
|
106
|
-
DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
|
107
|
-
|
108
|
-
DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
|
109
|
-
"C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
|
110
|
-
"G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
|
111
|
-
"T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
|
112
|
-
|
113
|
-
HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
|
114
|
-
HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
|
115
|
-
|
116
|
-
HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
|
117
|
-
"C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
|
118
|
-
"G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
|
119
|
-
"T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
|
120
|
-
|
121
|
-
HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
|
122
|
-
HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
|
123
|
-
|
124
|
-
MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
|
125
|
-
MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
|
126
|
-
|
127
|
-
MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
|
128
|
-
|
129
|
-
end
|
130
|
-
|
131
|
-
end
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
module Ytilib
|
3
|
+
|
4
|
+
srand
|
5
|
+
|
6
|
+
module Randoom
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def Randoom.new_counts
|
11
|
+
{ 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
|
12
|
+
end
|
13
|
+
|
14
|
+
def Randoom.random_letter(probs)
|
15
|
+
random = rand()
|
16
|
+
return 'A' if random < probs['A']
|
17
|
+
return 'C' if random < probs['A'] + probs['C']
|
18
|
+
return 'G' if random < probs['A'] + probs['C'] + probs['G']
|
19
|
+
return 'T'
|
20
|
+
end
|
21
|
+
|
22
|
+
public
|
23
|
+
|
24
|
+
def Randoom.calc_probs(input)
|
25
|
+
counts = new_counts
|
26
|
+
counts.default = 0
|
27
|
+
(0...input.length).each { |i|
|
28
|
+
counts[input[i,1].upcase] += 1
|
29
|
+
}
|
30
|
+
return make_probs!(counts)
|
31
|
+
end
|
32
|
+
|
33
|
+
def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
|
34
|
+
randoom = ''
|
35
|
+
if (probs_m == nil)
|
36
|
+
req_len.times { randoom << random_letter(probs) }
|
37
|
+
return randoom
|
38
|
+
end
|
39
|
+
random_l = random_letter(probs)
|
40
|
+
randoom = random_l
|
41
|
+
(req_len-1).times {
|
42
|
+
cur_probs = probs_m[random_l]
|
43
|
+
random_l = random_letter(cur_probs)
|
44
|
+
randoom << random_l
|
45
|
+
}
|
46
|
+
return randoom
|
47
|
+
end
|
48
|
+
|
49
|
+
def Randoom.calc_probs_m(input)
|
50
|
+
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
51
|
+
counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
|
52
|
+
(0...input.length-1).each { |i|
|
53
|
+
pair = input[i, 2].upcase
|
54
|
+
counts[pair[0,1]][pair[1,1]] += 1
|
55
|
+
}
|
56
|
+
probs_m['A'] = make_probs!(counts['A'])
|
57
|
+
probs_m['C'] = make_probs!(counts['C'])
|
58
|
+
probs_m['G'] = make_probs!(counts['G'])
|
59
|
+
probs_m['T'] = make_probs!(counts['T'])
|
60
|
+
return probs_m
|
61
|
+
end
|
62
|
+
|
63
|
+
def Randoom.make_probs_m!(counts)
|
64
|
+
['A','C','G','T','N'].each { |l2|
|
65
|
+
addv = counts['N'][l2] / 4.0
|
66
|
+
['A','C','G','T'].each { |l1|
|
67
|
+
counts[l1][l2] += addv
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
72
|
+
probs_m['A'] = make_probs!(counts['A'])
|
73
|
+
probs_m['C'] = make_probs!(counts['C'])
|
74
|
+
probs_m['G'] = make_probs!(counts['G'])
|
75
|
+
probs_m['T'] = make_probs!(counts['T'])
|
76
|
+
return probs_m
|
77
|
+
end
|
78
|
+
|
79
|
+
def Randoom.make_probs!(counts, length = nil)
|
80
|
+
probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
|
81
|
+
length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
|
82
|
+
length = length.to_f
|
83
|
+
['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
|
84
|
+
return probs if length == 0
|
85
|
+
probs['A'] = counts['A'] / length
|
86
|
+
probs['C'] = counts['C'] / length
|
87
|
+
probs['G'] = counts['G'] / length
|
88
|
+
probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
|
89
|
+
return probs
|
90
|
+
end
|
91
|
+
|
92
|
+
def Randoom.equalize!(probs)
|
93
|
+
probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
|
94
|
+
probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
|
95
|
+
return probs
|
96
|
+
end
|
97
|
+
|
98
|
+
def Randoom.twostrand!(probs)
|
99
|
+
return Randoom.equalize!(probs)
|
100
|
+
end
|
101
|
+
|
102
|
+
DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
|
103
|
+
|
104
|
+
# probabilities counted without _random.fa files for human genome
|
105
|
+
DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
|
106
|
+
DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
|
107
|
+
|
108
|
+
DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
|
109
|
+
"C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
|
110
|
+
"G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
|
111
|
+
"T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
|
112
|
+
|
113
|
+
HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
|
114
|
+
HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
|
115
|
+
|
116
|
+
HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
|
117
|
+
"C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
|
118
|
+
"G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
|
119
|
+
"T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
|
120
|
+
|
121
|
+
HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
|
122
|
+
HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
|
123
|
+
|
124
|
+
MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
|
125
|
+
MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
|
126
|
+
|
127
|
+
MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
@@ -1,147 +1,147 @@
|
|
1
|
-
module Ytilib
|
2
|
-
def Ytilib.time
|
3
|
-
return Time.now.strftime('%d %b %H:%M:%S')
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
$program_name = nil
|
8
|
-
|
9
|
-
def start(fullpath)
|
10
|
-
report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
|
11
|
-
return if $NO_REPORT
|
12
|
-
$program_name = "[#{File.name_wo_ext(fullpath)}]"
|
13
|
-
end
|
14
|
-
|
15
|
-
def report(message, program_name = nil)
|
16
|
-
$program_name = "[#{program_name}]" if program_name != nil
|
17
|
-
return if $NO_REPORT
|
18
|
-
puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
|
19
|
-
end
|
20
|
-
|
21
|
-
def checkerr(message = "checkerr failed")
|
22
|
-
if !block_given? || yield
|
23
|
-
puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
|
24
|
-
raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
module Ytilib
|
29
|
-
|
30
|
-
STRAND_DIRECT = "direct"
|
31
|
-
STRAND_REVCOMP = "revcomp"
|
32
|
-
|
33
|
-
def Ytilib.read_mfa2hash(path)
|
34
|
-
input_fasta_f = File.new(path, "r")
|
35
|
-
seqs, seq_name = {}, nil
|
36
|
-
input_fasta_f.each_line { |line|
|
37
|
-
if line[0,1] == ">"
|
38
|
-
seq_name = line[1..-1].strip
|
39
|
-
seq_name = yield seq_name if block_given?
|
40
|
-
checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
|
41
|
-
seqs[seq_name] = ""
|
42
|
-
elsif seq_name != nil
|
43
|
-
seqs[seq_name] << line.strip
|
44
|
-
end
|
45
|
-
}
|
46
|
-
input_fasta_f.close
|
47
|
-
return seqs
|
48
|
-
end
|
49
|
-
|
50
|
-
def Ytilib.read_mfa2array(path)
|
51
|
-
input_fasta_f = File.new(path, "r")
|
52
|
-
seqs, seq_name = [], nil
|
53
|
-
input_fasta_f.each_line { |line|
|
54
|
-
if line[0,1] == ">"
|
55
|
-
seq_name = line[1..-1].strip
|
56
|
-
yield seq_name if block_given?
|
57
|
-
seqs << ""
|
58
|
-
elsif seq_name != nil
|
59
|
-
seqs.last << line.strip
|
60
|
-
end
|
61
|
-
}
|
62
|
-
input_fasta_f.close
|
63
|
-
return seqs
|
64
|
-
end
|
65
|
-
|
66
|
-
def Ytilib.mfa2array(input)
|
67
|
-
seqs, seq_name = [], nil
|
68
|
-
input.each_line { |line|
|
69
|
-
if line[0,1] == ">"
|
70
|
-
seq_name = line[1..-1].strip
|
71
|
-
seqs << ""
|
72
|
-
elsif seq_name != nil
|
73
|
-
seqs.last << line.strip
|
74
|
-
end
|
75
|
-
}
|
76
|
-
return seqs
|
77
|
-
end
|
78
|
-
|
79
|
-
def Ytilib.read_plain2array(path)
|
80
|
-
array = []
|
81
|
-
File.open(path).each_line { |line|
|
82
|
-
array << line.strip if !line.strip.empty?
|
83
|
-
}
|
84
|
-
return array
|
85
|
-
end
|
86
|
-
|
87
|
-
def Ytilib.read_seqs2array(path)
|
88
|
-
type = File.ext_wo_name(path)
|
89
|
-
case type
|
90
|
-
when "mfa", "fasta", "fa"
|
91
|
-
return Ytilib.read_mfa2array(path)
|
92
|
-
when "plain","txt"
|
93
|
-
return Ytilib.read_plain2array(path)
|
94
|
-
else
|
95
|
-
checkerr("unknown sequences-file, ext=#{type}")
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def Ytilib.write_mfa(seqs, path, prefix = " ")
|
100
|
-
if seqs.is_a?(Hash)
|
101
|
-
out_fasta_f = File.new(path, "w+")
|
102
|
-
seqs.each_key { |name|
|
103
|
-
out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
|
104
|
-
}
|
105
|
-
out_fasta_f.close
|
106
|
-
else
|
107
|
-
out_fasta_f = File.new(path, "w+")
|
108
|
-
seqs.each_with_index { |seq, i|
|
109
|
-
out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
|
110
|
-
}
|
111
|
-
out_fasta_f.close
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def get_consensus(seqs)
|
116
|
-
report "consensus creating method should be checked, you are using unsafe code"
|
117
|
-
return 'nil' if seqs.size == 0
|
118
|
-
conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
|
119
|
-
'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
|
120
|
-
'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
|
121
|
-
}
|
122
|
-
new_consensus, letters = '', []
|
123
|
-
0.upto(seqs[0].size-1) { |i|
|
124
|
-
seqs.each do |word|
|
125
|
-
letters << word[i] if !letters.include?(word[i])
|
126
|
-
end
|
127
|
-
letters.sort!
|
128
|
-
letters_string = ''
|
129
|
-
letters.each do |letter| letters_string << letter end
|
130
|
-
checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
|
131
|
-
new_consensus << conslet[letters_string]
|
132
|
-
letters.clear
|
133
|
-
}
|
134
|
-
return new_consensus
|
135
|
-
end
|
136
|
-
|
137
|
-
def Ytilib.new_mysql_conn(database)
|
138
|
-
my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
|
139
|
-
checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
|
140
|
-
return my
|
141
|
-
end
|
142
|
-
|
143
|
-
|
144
|
-
end
|
145
|
-
|
146
|
-
report "ytilib required, working directory #{Dir.pwd}", "ytilib"
|
1
|
+
module Ytilib
|
2
|
+
def Ytilib.time
|
3
|
+
return Time.now.strftime('%d %b %H:%M:%S')
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
$program_name = nil
|
8
|
+
|
9
|
+
def start(fullpath)
|
10
|
+
report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
|
11
|
+
return if $NO_REPORT
|
12
|
+
$program_name = "[#{File.name_wo_ext(fullpath)}]"
|
13
|
+
end
|
14
|
+
|
15
|
+
def report(message, program_name = nil)
|
16
|
+
$program_name = "[#{program_name}]" if program_name != nil
|
17
|
+
return if $NO_REPORT
|
18
|
+
puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
|
19
|
+
end
|
20
|
+
|
21
|
+
def checkerr(message = "checkerr failed")
|
22
|
+
if !block_given? || yield
|
23
|
+
puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
|
24
|
+
raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module Ytilib
|
29
|
+
|
30
|
+
STRAND_DIRECT = "direct"
|
31
|
+
STRAND_REVCOMP = "revcomp"
|
32
|
+
|
33
|
+
def Ytilib.read_mfa2hash(path)
|
34
|
+
input_fasta_f = File.new(path, "r")
|
35
|
+
seqs, seq_name = {}, nil
|
36
|
+
input_fasta_f.each_line { |line|
|
37
|
+
if line[0,1] == ">"
|
38
|
+
seq_name = line[1..-1].strip
|
39
|
+
seq_name = yield seq_name if block_given?
|
40
|
+
checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
|
41
|
+
seqs[seq_name] = ""
|
42
|
+
elsif seq_name != nil
|
43
|
+
seqs[seq_name] << line.strip
|
44
|
+
end
|
45
|
+
}
|
46
|
+
input_fasta_f.close
|
47
|
+
return seqs
|
48
|
+
end
|
49
|
+
|
50
|
+
def Ytilib.read_mfa2array(path)
|
51
|
+
input_fasta_f = File.new(path, "r")
|
52
|
+
seqs, seq_name = [], nil
|
53
|
+
input_fasta_f.each_line { |line|
|
54
|
+
if line[0,1] == ">"
|
55
|
+
seq_name = line[1..-1].strip
|
56
|
+
yield seq_name if block_given?
|
57
|
+
seqs << ""
|
58
|
+
elsif seq_name != nil
|
59
|
+
seqs.last << line.strip
|
60
|
+
end
|
61
|
+
}
|
62
|
+
input_fasta_f.close
|
63
|
+
return seqs
|
64
|
+
end
|
65
|
+
|
66
|
+
def Ytilib.mfa2array(input)
|
67
|
+
seqs, seq_name = [], nil
|
68
|
+
input.each_line { |line|
|
69
|
+
if line[0,1] == ">"
|
70
|
+
seq_name = line[1..-1].strip
|
71
|
+
seqs << ""
|
72
|
+
elsif seq_name != nil
|
73
|
+
seqs.last << line.strip
|
74
|
+
end
|
75
|
+
}
|
76
|
+
return seqs
|
77
|
+
end
|
78
|
+
|
79
|
+
def Ytilib.read_plain2array(path)
|
80
|
+
array = []
|
81
|
+
File.open(path).each_line { |line|
|
82
|
+
array << line.strip if !line.strip.empty?
|
83
|
+
}
|
84
|
+
return array
|
85
|
+
end
|
86
|
+
|
87
|
+
def Ytilib.read_seqs2array(path)
|
88
|
+
type = File.ext_wo_name(path)
|
89
|
+
case type
|
90
|
+
when "mfa", "fasta", "fa"
|
91
|
+
return Ytilib.read_mfa2array(path)
|
92
|
+
when "plain","txt"
|
93
|
+
return Ytilib.read_plain2array(path)
|
94
|
+
else
|
95
|
+
checkerr("unknown sequences-file, ext=#{type}")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def Ytilib.write_mfa(seqs, path, prefix = " ")
|
100
|
+
if seqs.is_a?(Hash)
|
101
|
+
out_fasta_f = File.new(path, "w+")
|
102
|
+
seqs.each_key { |name|
|
103
|
+
out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
|
104
|
+
}
|
105
|
+
out_fasta_f.close
|
106
|
+
else
|
107
|
+
out_fasta_f = File.new(path, "w+")
|
108
|
+
seqs.each_with_index { |seq, i|
|
109
|
+
out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
|
110
|
+
}
|
111
|
+
out_fasta_f.close
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def get_consensus(seqs)
|
116
|
+
report "consensus creating method should be checked, you are using unsafe code"
|
117
|
+
return 'nil' if seqs.size == 0
|
118
|
+
conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
|
119
|
+
'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
|
120
|
+
'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
|
121
|
+
}
|
122
|
+
new_consensus, letters = '', []
|
123
|
+
0.upto(seqs[0].size-1) { |i|
|
124
|
+
seqs.each do |word|
|
125
|
+
letters << word[i] if !letters.include?(word[i])
|
126
|
+
end
|
127
|
+
letters.sort!
|
128
|
+
letters_string = ''
|
129
|
+
letters.each do |letter| letters_string << letter end
|
130
|
+
checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
|
131
|
+
new_consensus << conslet[letters_string]
|
132
|
+
letters.clear
|
133
|
+
}
|
134
|
+
return new_consensus
|
135
|
+
end
|
136
|
+
|
137
|
+
def Ytilib.new_mysql_conn(database)
|
138
|
+
my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
|
139
|
+
checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
|
140
|
+
return my
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
#report "ytilib required, working directory #{Dir.pwd}", "ytilib"
|
147
147
|
include Ytilib
|