sequence_logo 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +21 -21
- data/README.md +65 -60
- data/Rakefile +5 -5
- data/TODO.txt +8 -0
- data/bin/glue_logos +3 -0
- data/bin/sequence_logo +3 -0
- data/lib/sequence_logo.rb +7 -7
- data/lib/sequence_logo/cli.rb +37 -0
- data/lib/sequence_logo/exec/glue_logos.rb +67 -0
- data/lib/sequence_logo/exec/sequence_logo.rb +52 -0
- data/lib/sequence_logo/pmflogo_lib.rb +95 -174
- data/lib/sequence_logo/version.rb +3 -3
- data/lib/sequence_logo/ytilib.rb +10 -9
- data/lib/sequence_logo/ytilib/addon.rb +246 -246
- data/lib/sequence_logo/ytilib/bismark.rb +70 -70
- data/lib/sequence_logo/ytilib/hack1.rb +75 -75
- data/lib/sequence_logo/ytilib/pm.rb +562 -562
- data/lib/sequence_logo/ytilib/pmsd.rb +1 -1
- data/lib/sequence_logo/ytilib/ppm_support.rb +85 -0
- data/lib/sequence_logo/ytilib/randoom.rb +131 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +146 -146
- data/sequence_logo.gemspec +21 -21
- data/test/data/logo/AHR_si_direct.png +0 -0
- data/test/data/logo/AHR_si_revcomp.png +0 -0
- data/test/data/logo/AIRE_f2_direct.png +0 -0
- data/test/data/logo/AIRE_f2_revcomp.png +0 -0
- data/test/data/pcm/AHR_si.pcm +10 -0
- data/test/data/pcm/AIRE_f2.pcm +19 -0
- metadata +33 -32
- data/bin/create_all_logos +0 -3
- data/bin/generate_logo +0 -3
- data/bin/pmflogo +0 -3
- data/lib/sequence_logo/exec/create_all_logos.rb +0 -25
- data/lib/sequence_logo/exec/generate_logo.rb +0 -18
- data/lib/sequence_logo/exec/pmflogo.rb +0 -26
@@ -0,0 +1,85 @@
|
|
1
|
+
class Object
|
2
|
+
def deep_dup
|
3
|
+
Marshal.load(Marshal.dump(self))
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
def get_ppm_from_file(in_file_name)
|
8
|
+
case File.ext_wo_name(in_file_name)
|
9
|
+
when 'pat', 'pcm'
|
10
|
+
pm = PM.load(in_file_name)
|
11
|
+
pm.fixwc if pm.words_count
|
12
|
+
when 'mfa', 'fasta', 'plain'
|
13
|
+
pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
|
14
|
+
when 'xml'
|
15
|
+
pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
|
16
|
+
when in_file_name
|
17
|
+
pm = PPM.from_IUPAC(in_file_name.upcase)
|
18
|
+
end
|
19
|
+
pm.get_ppm
|
20
|
+
rescue
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
|
24
|
+
class PPM
|
25
|
+
attr_accessor :name
|
26
|
+
|
27
|
+
def get_ppm
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_line(v)
|
32
|
+
( (v - icd4of4) / icd4of4 ).abs
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_logo(icd_mode)
|
36
|
+
case icd_mode.to_s
|
37
|
+
when 'weblogo'
|
38
|
+
get_logo_weblogo
|
39
|
+
when 'discrete'
|
40
|
+
get_logo_discrete
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def get_logo_weblogo
|
46
|
+
rseq = []
|
47
|
+
@matrix['A'].each_index { |i|
|
48
|
+
rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
|
49
|
+
pn = @matrix[l][i]
|
50
|
+
sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
|
55
|
+
@matrix['A'].each_index { |i|
|
56
|
+
['A','C','G','T'].each { |l|
|
57
|
+
mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
mat
|
62
|
+
end
|
63
|
+
|
64
|
+
def get_logo_discrete
|
65
|
+
checkerr("words count is undefined") { !words_count }
|
66
|
+
|
67
|
+
rseq = []
|
68
|
+
@matrix['A'].each_index { |i|
|
69
|
+
rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
|
70
|
+
}
|
71
|
+
|
72
|
+
mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
|
73
|
+
@matrix['A'].each_index { |i|
|
74
|
+
['A','C','G','T'].each { |l|
|
75
|
+
mat[l][i] = @matrix[l][i] * rseq[i]
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
mat
|
80
|
+
end
|
81
|
+
|
82
|
+
def revcomp
|
83
|
+
deep_dup.revcomp!
|
84
|
+
end
|
85
|
+
end
|
@@ -1,131 +1,131 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
module Ytilib
|
3
|
-
|
4
|
-
srand
|
5
|
-
|
6
|
-
module Randoom
|
7
|
-
|
8
|
-
private
|
9
|
-
|
10
|
-
def Randoom.new_counts
|
11
|
-
{ 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
|
12
|
-
end
|
13
|
-
|
14
|
-
def Randoom.random_letter(probs)
|
15
|
-
random = rand()
|
16
|
-
return 'A' if random < probs['A']
|
17
|
-
return 'C' if random < probs['A'] + probs['C']
|
18
|
-
return 'G' if random < probs['A'] + probs['C'] + probs['G']
|
19
|
-
return 'T'
|
20
|
-
end
|
21
|
-
|
22
|
-
public
|
23
|
-
|
24
|
-
def Randoom.calc_probs(input)
|
25
|
-
counts = new_counts
|
26
|
-
counts.default = 0
|
27
|
-
(0...input.length).each { |i|
|
28
|
-
counts[input[i,1].upcase] += 1
|
29
|
-
}
|
30
|
-
return make_probs!(counts)
|
31
|
-
end
|
32
|
-
|
33
|
-
def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
|
34
|
-
randoom = ''
|
35
|
-
if (probs_m == nil)
|
36
|
-
req_len.times { randoom << random_letter(probs) }
|
37
|
-
return randoom
|
38
|
-
end
|
39
|
-
random_l = random_letter(probs)
|
40
|
-
randoom = random_l
|
41
|
-
(req_len-1).times {
|
42
|
-
cur_probs = probs_m[random_l]
|
43
|
-
random_l = random_letter(cur_probs)
|
44
|
-
randoom << random_l
|
45
|
-
}
|
46
|
-
return randoom
|
47
|
-
end
|
48
|
-
|
49
|
-
def Randoom.calc_probs_m(input)
|
50
|
-
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
51
|
-
counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
|
52
|
-
(0...input.length-1).each { |i|
|
53
|
-
pair = input[i, 2].upcase
|
54
|
-
counts[pair[0,1]][pair[1,1]] += 1
|
55
|
-
}
|
56
|
-
probs_m['A'] = make_probs!(counts['A'])
|
57
|
-
probs_m['C'] = make_probs!(counts['C'])
|
58
|
-
probs_m['G'] = make_probs!(counts['G'])
|
59
|
-
probs_m['T'] = make_probs!(counts['T'])
|
60
|
-
return probs_m
|
61
|
-
end
|
62
|
-
|
63
|
-
def Randoom.make_probs_m!(counts)
|
64
|
-
['A','C','G','T','N'].each { |l2|
|
65
|
-
addv = counts['N'][l2] / 4.0
|
66
|
-
['A','C','G','T'].each { |l1|
|
67
|
-
counts[l1][l2] += addv
|
68
|
-
}
|
69
|
-
}
|
70
|
-
|
71
|
-
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
72
|
-
probs_m['A'] = make_probs!(counts['A'])
|
73
|
-
probs_m['C'] = make_probs!(counts['C'])
|
74
|
-
probs_m['G'] = make_probs!(counts['G'])
|
75
|
-
probs_m['T'] = make_probs!(counts['T'])
|
76
|
-
return probs_m
|
77
|
-
end
|
78
|
-
|
79
|
-
def Randoom.make_probs!(counts, length = nil)
|
80
|
-
probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
|
81
|
-
length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
|
82
|
-
length = length.to_f
|
83
|
-
['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
|
84
|
-
return probs if length == 0
|
85
|
-
probs['A'] = counts['A'] / length
|
86
|
-
probs['C'] = counts['C'] / length
|
87
|
-
probs['G'] = counts['G'] / length
|
88
|
-
probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
|
89
|
-
return probs
|
90
|
-
end
|
91
|
-
|
92
|
-
def Randoom.equalize!(probs)
|
93
|
-
probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
|
94
|
-
probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
|
95
|
-
return probs
|
96
|
-
end
|
97
|
-
|
98
|
-
def Randoom.twostrand!(probs)
|
99
|
-
return Randoom.equalize!(probs)
|
100
|
-
end
|
101
|
-
|
102
|
-
DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
|
103
|
-
|
104
|
-
# probabilities counted without _random.fa files for human genome
|
105
|
-
DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
|
106
|
-
DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
|
107
|
-
|
108
|
-
DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
|
109
|
-
"C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
|
110
|
-
"G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
|
111
|
-
"T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
|
112
|
-
|
113
|
-
HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
|
114
|
-
HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
|
115
|
-
|
116
|
-
HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
|
117
|
-
"C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
|
118
|
-
"G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
|
119
|
-
"T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
|
120
|
-
|
121
|
-
HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
|
122
|
-
HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
|
123
|
-
|
124
|
-
MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
|
125
|
-
MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
|
126
|
-
|
127
|
-
MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
|
128
|
-
|
129
|
-
end
|
130
|
-
|
131
|
-
end
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
module Ytilib
|
3
|
+
|
4
|
+
srand
|
5
|
+
|
6
|
+
module Randoom
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def Randoom.new_counts
|
11
|
+
{ 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
|
12
|
+
end
|
13
|
+
|
14
|
+
def Randoom.random_letter(probs)
|
15
|
+
random = rand()
|
16
|
+
return 'A' if random < probs['A']
|
17
|
+
return 'C' if random < probs['A'] + probs['C']
|
18
|
+
return 'G' if random < probs['A'] + probs['C'] + probs['G']
|
19
|
+
return 'T'
|
20
|
+
end
|
21
|
+
|
22
|
+
public
|
23
|
+
|
24
|
+
def Randoom.calc_probs(input)
|
25
|
+
counts = new_counts
|
26
|
+
counts.default = 0
|
27
|
+
(0...input.length).each { |i|
|
28
|
+
counts[input[i,1].upcase] += 1
|
29
|
+
}
|
30
|
+
return make_probs!(counts)
|
31
|
+
end
|
32
|
+
|
33
|
+
def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
|
34
|
+
randoom = ''
|
35
|
+
if (probs_m == nil)
|
36
|
+
req_len.times { randoom << random_letter(probs) }
|
37
|
+
return randoom
|
38
|
+
end
|
39
|
+
random_l = random_letter(probs)
|
40
|
+
randoom = random_l
|
41
|
+
(req_len-1).times {
|
42
|
+
cur_probs = probs_m[random_l]
|
43
|
+
random_l = random_letter(cur_probs)
|
44
|
+
randoom << random_l
|
45
|
+
}
|
46
|
+
return randoom
|
47
|
+
end
|
48
|
+
|
49
|
+
def Randoom.calc_probs_m(input)
|
50
|
+
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
51
|
+
counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
|
52
|
+
(0...input.length-1).each { |i|
|
53
|
+
pair = input[i, 2].upcase
|
54
|
+
counts[pair[0,1]][pair[1,1]] += 1
|
55
|
+
}
|
56
|
+
probs_m['A'] = make_probs!(counts['A'])
|
57
|
+
probs_m['C'] = make_probs!(counts['C'])
|
58
|
+
probs_m['G'] = make_probs!(counts['G'])
|
59
|
+
probs_m['T'] = make_probs!(counts['T'])
|
60
|
+
return probs_m
|
61
|
+
end
|
62
|
+
|
63
|
+
def Randoom.make_probs_m!(counts)
|
64
|
+
['A','C','G','T','N'].each { |l2|
|
65
|
+
addv = counts['N'][l2] / 4.0
|
66
|
+
['A','C','G','T'].each { |l1|
|
67
|
+
counts[l1][l2] += addv
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
|
72
|
+
probs_m['A'] = make_probs!(counts['A'])
|
73
|
+
probs_m['C'] = make_probs!(counts['C'])
|
74
|
+
probs_m['G'] = make_probs!(counts['G'])
|
75
|
+
probs_m['T'] = make_probs!(counts['T'])
|
76
|
+
return probs_m
|
77
|
+
end
|
78
|
+
|
79
|
+
def Randoom.make_probs!(counts, length = nil)
|
80
|
+
probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
|
81
|
+
length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
|
82
|
+
length = length.to_f
|
83
|
+
['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
|
84
|
+
return probs if length == 0
|
85
|
+
probs['A'] = counts['A'] / length
|
86
|
+
probs['C'] = counts['C'] / length
|
87
|
+
probs['G'] = counts['G'] / length
|
88
|
+
probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
|
89
|
+
return probs
|
90
|
+
end
|
91
|
+
|
92
|
+
def Randoom.equalize!(probs)
|
93
|
+
probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
|
94
|
+
probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
|
95
|
+
return probs
|
96
|
+
end
|
97
|
+
|
98
|
+
def Randoom.twostrand!(probs)
|
99
|
+
return Randoom.equalize!(probs)
|
100
|
+
end
|
101
|
+
|
102
|
+
DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
|
103
|
+
|
104
|
+
# probabilities counted without _random.fa files for human genome
|
105
|
+
DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
|
106
|
+
DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
|
107
|
+
|
108
|
+
DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
|
109
|
+
"C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
|
110
|
+
"G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
|
111
|
+
"T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
|
112
|
+
|
113
|
+
HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
|
114
|
+
HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
|
115
|
+
|
116
|
+
HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
|
117
|
+
"C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
|
118
|
+
"G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
|
119
|
+
"T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
|
120
|
+
|
121
|
+
HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
|
122
|
+
HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
|
123
|
+
|
124
|
+
MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
|
125
|
+
MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
|
126
|
+
|
127
|
+
MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
@@ -1,147 +1,147 @@
|
|
1
|
-
module Ytilib
|
2
|
-
def Ytilib.time
|
3
|
-
return Time.now.strftime('%d %b %H:%M:%S')
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
$program_name = nil
|
8
|
-
|
9
|
-
def start(fullpath)
|
10
|
-
report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
|
11
|
-
return if $NO_REPORT
|
12
|
-
$program_name = "[#{File.name_wo_ext(fullpath)}]"
|
13
|
-
end
|
14
|
-
|
15
|
-
def report(message, program_name = nil)
|
16
|
-
$program_name = "[#{program_name}]" if program_name != nil
|
17
|
-
return if $NO_REPORT
|
18
|
-
puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
|
19
|
-
end
|
20
|
-
|
21
|
-
def checkerr(message = "checkerr failed")
|
22
|
-
if !block_given? || yield
|
23
|
-
puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
|
24
|
-
raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
module Ytilib
|
29
|
-
|
30
|
-
STRAND_DIRECT = "direct"
|
31
|
-
STRAND_REVCOMP = "revcomp"
|
32
|
-
|
33
|
-
def Ytilib.read_mfa2hash(path)
|
34
|
-
input_fasta_f = File.new(path, "r")
|
35
|
-
seqs, seq_name = {}, nil
|
36
|
-
input_fasta_f.each_line { |line|
|
37
|
-
if line[0,1] == ">"
|
38
|
-
seq_name = line[1..-1].strip
|
39
|
-
seq_name = yield seq_name if block_given?
|
40
|
-
checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
|
41
|
-
seqs[seq_name] = ""
|
42
|
-
elsif seq_name != nil
|
43
|
-
seqs[seq_name] << line.strip
|
44
|
-
end
|
45
|
-
}
|
46
|
-
input_fasta_f.close
|
47
|
-
return seqs
|
48
|
-
end
|
49
|
-
|
50
|
-
def Ytilib.read_mfa2array(path)
|
51
|
-
input_fasta_f = File.new(path, "r")
|
52
|
-
seqs, seq_name = [], nil
|
53
|
-
input_fasta_f.each_line { |line|
|
54
|
-
if line[0,1] == ">"
|
55
|
-
seq_name = line[1..-1].strip
|
56
|
-
yield seq_name if block_given?
|
57
|
-
seqs << ""
|
58
|
-
elsif seq_name != nil
|
59
|
-
seqs.last << line.strip
|
60
|
-
end
|
61
|
-
}
|
62
|
-
input_fasta_f.close
|
63
|
-
return seqs
|
64
|
-
end
|
65
|
-
|
66
|
-
def Ytilib.mfa2array(input)
|
67
|
-
seqs, seq_name = [], nil
|
68
|
-
input.each_line { |line|
|
69
|
-
if line[0,1] == ">"
|
70
|
-
seq_name = line[1..-1].strip
|
71
|
-
seqs << ""
|
72
|
-
elsif seq_name != nil
|
73
|
-
seqs.last << line.strip
|
74
|
-
end
|
75
|
-
}
|
76
|
-
return seqs
|
77
|
-
end
|
78
|
-
|
79
|
-
def Ytilib.read_plain2array(path)
|
80
|
-
array = []
|
81
|
-
File.open(path).each_line { |line|
|
82
|
-
array << line.strip if !line.strip.empty?
|
83
|
-
}
|
84
|
-
return array
|
85
|
-
end
|
86
|
-
|
87
|
-
def Ytilib.read_seqs2array(path)
|
88
|
-
type = File.ext_wo_name(path)
|
89
|
-
case type
|
90
|
-
when "mfa", "fasta", "fa"
|
91
|
-
return Ytilib.read_mfa2array(path)
|
92
|
-
when "plain","txt"
|
93
|
-
return Ytilib.read_plain2array(path)
|
94
|
-
else
|
95
|
-
checkerr("unknown sequences-file, ext=#{type}")
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def Ytilib.write_mfa(seqs, path, prefix = " ")
|
100
|
-
if seqs.is_a?(Hash)
|
101
|
-
out_fasta_f = File.new(path, "w+")
|
102
|
-
seqs.each_key { |name|
|
103
|
-
out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
|
104
|
-
}
|
105
|
-
out_fasta_f.close
|
106
|
-
else
|
107
|
-
out_fasta_f = File.new(path, "w+")
|
108
|
-
seqs.each_with_index { |seq, i|
|
109
|
-
out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
|
110
|
-
}
|
111
|
-
out_fasta_f.close
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def get_consensus(seqs)
|
116
|
-
report "consensus creating method should be checked, you are using unsafe code"
|
117
|
-
return 'nil' if seqs.size == 0
|
118
|
-
conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
|
119
|
-
'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
|
120
|
-
'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
|
121
|
-
}
|
122
|
-
new_consensus, letters = '', []
|
123
|
-
0.upto(seqs[0].size-1) { |i|
|
124
|
-
seqs.each do |word|
|
125
|
-
letters << word[i] if !letters.include?(word[i])
|
126
|
-
end
|
127
|
-
letters.sort!
|
128
|
-
letters_string = ''
|
129
|
-
letters.each do |letter| letters_string << letter end
|
130
|
-
checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
|
131
|
-
new_consensus << conslet[letters_string]
|
132
|
-
letters.clear
|
133
|
-
}
|
134
|
-
return new_consensus
|
135
|
-
end
|
136
|
-
|
137
|
-
def Ytilib.new_mysql_conn(database)
|
138
|
-
my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
|
139
|
-
checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
|
140
|
-
return my
|
141
|
-
end
|
142
|
-
|
143
|
-
|
144
|
-
end
|
145
|
-
|
146
|
-
report "ytilib required, working directory #{Dir.pwd}", "ytilib"
|
1
|
+
module Ytilib
|
2
|
+
def Ytilib.time
|
3
|
+
return Time.now.strftime('%d %b %H:%M:%S')
|
4
|
+
end
|
5
|
+
end
|
6
|
+
|
7
|
+
$program_name = nil
|
8
|
+
|
9
|
+
def start(fullpath)
|
10
|
+
report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
|
11
|
+
return if $NO_REPORT
|
12
|
+
$program_name = "[#{File.name_wo_ext(fullpath)}]"
|
13
|
+
end
|
14
|
+
|
15
|
+
def report(message, program_name = nil)
|
16
|
+
$program_name = "[#{program_name}]" if program_name != nil
|
17
|
+
return if $NO_REPORT
|
18
|
+
puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
|
19
|
+
end
|
20
|
+
|
21
|
+
def checkerr(message = "checkerr failed")
|
22
|
+
if !block_given? || yield
|
23
|
+
puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
|
24
|
+
raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module Ytilib
|
29
|
+
|
30
|
+
STRAND_DIRECT = "direct"
|
31
|
+
STRAND_REVCOMP = "revcomp"
|
32
|
+
|
33
|
+
def Ytilib.read_mfa2hash(path)
|
34
|
+
input_fasta_f = File.new(path, "r")
|
35
|
+
seqs, seq_name = {}, nil
|
36
|
+
input_fasta_f.each_line { |line|
|
37
|
+
if line[0,1] == ">"
|
38
|
+
seq_name = line[1..-1].strip
|
39
|
+
seq_name = yield seq_name if block_given?
|
40
|
+
checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
|
41
|
+
seqs[seq_name] = ""
|
42
|
+
elsif seq_name != nil
|
43
|
+
seqs[seq_name] << line.strip
|
44
|
+
end
|
45
|
+
}
|
46
|
+
input_fasta_f.close
|
47
|
+
return seqs
|
48
|
+
end
|
49
|
+
|
50
|
+
def Ytilib.read_mfa2array(path)
|
51
|
+
input_fasta_f = File.new(path, "r")
|
52
|
+
seqs, seq_name = [], nil
|
53
|
+
input_fasta_f.each_line { |line|
|
54
|
+
if line[0,1] == ">"
|
55
|
+
seq_name = line[1..-1].strip
|
56
|
+
yield seq_name if block_given?
|
57
|
+
seqs << ""
|
58
|
+
elsif seq_name != nil
|
59
|
+
seqs.last << line.strip
|
60
|
+
end
|
61
|
+
}
|
62
|
+
input_fasta_f.close
|
63
|
+
return seqs
|
64
|
+
end
|
65
|
+
|
66
|
+
def Ytilib.mfa2array(input)
|
67
|
+
seqs, seq_name = [], nil
|
68
|
+
input.each_line { |line|
|
69
|
+
if line[0,1] == ">"
|
70
|
+
seq_name = line[1..-1].strip
|
71
|
+
seqs << ""
|
72
|
+
elsif seq_name != nil
|
73
|
+
seqs.last << line.strip
|
74
|
+
end
|
75
|
+
}
|
76
|
+
return seqs
|
77
|
+
end
|
78
|
+
|
79
|
+
def Ytilib.read_plain2array(path)
|
80
|
+
array = []
|
81
|
+
File.open(path).each_line { |line|
|
82
|
+
array << line.strip if !line.strip.empty?
|
83
|
+
}
|
84
|
+
return array
|
85
|
+
end
|
86
|
+
|
87
|
+
def Ytilib.read_seqs2array(path)
|
88
|
+
type = File.ext_wo_name(path)
|
89
|
+
case type
|
90
|
+
when "mfa", "fasta", "fa"
|
91
|
+
return Ytilib.read_mfa2array(path)
|
92
|
+
when "plain","txt"
|
93
|
+
return Ytilib.read_plain2array(path)
|
94
|
+
else
|
95
|
+
checkerr("unknown sequences-file, ext=#{type}")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def Ytilib.write_mfa(seqs, path, prefix = " ")
|
100
|
+
if seqs.is_a?(Hash)
|
101
|
+
out_fasta_f = File.new(path, "w+")
|
102
|
+
seqs.each_key { |name|
|
103
|
+
out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
|
104
|
+
}
|
105
|
+
out_fasta_f.close
|
106
|
+
else
|
107
|
+
out_fasta_f = File.new(path, "w+")
|
108
|
+
seqs.each_with_index { |seq, i|
|
109
|
+
out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
|
110
|
+
}
|
111
|
+
out_fasta_f.close
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def get_consensus(seqs)
|
116
|
+
report "consensus creating method should be checked, you are using unsafe code"
|
117
|
+
return 'nil' if seqs.size == 0
|
118
|
+
conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
|
119
|
+
'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
|
120
|
+
'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
|
121
|
+
}
|
122
|
+
new_consensus, letters = '', []
|
123
|
+
0.upto(seqs[0].size-1) { |i|
|
124
|
+
seqs.each do |word|
|
125
|
+
letters << word[i] if !letters.include?(word[i])
|
126
|
+
end
|
127
|
+
letters.sort!
|
128
|
+
letters_string = ''
|
129
|
+
letters.each do |letter| letters_string << letter end
|
130
|
+
checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
|
131
|
+
new_consensus << conslet[letters_string]
|
132
|
+
letters.clear
|
133
|
+
}
|
134
|
+
return new_consensus
|
135
|
+
end
|
136
|
+
|
137
|
+
def Ytilib.new_mysql_conn(database)
|
138
|
+
my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
|
139
|
+
checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
|
140
|
+
return my
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
#report "ytilib required, working directory #{Dir.pwd}", "ytilib"
|
147
147
|
include Ytilib
|