sequence_logo 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/ruby
2
+ module Ytilib
3
+ class PM
4
+ def score_sigma(trycount = 4**10, approx = false, bg = nil)
5
+
6
+ scores = []
7
+ if @size <= 10 && !approx
8
+ (0...4**@size).each { |i|
9
+ word = i.to_s(4).rjust(@size, "0").tr("0123", "ACGT")
10
+ scores << score(word)
11
+ }
12
+ else
13
+ trycount.times {
14
+ word = bg ? Randoom.rand_seq(@size, bg) : Randoom.rand_seq(@size)
15
+ scores << score(word)
16
+ }
17
+ end
18
+ sum1 = scores.inject(0) { |sum,s| sum += s }
19
+ mean = sum1 / scores.size
20
+
21
+ sum2, sumc = 0, 0
22
+ scores.each { |score|
23
+ sum2 += (score-mean)**2
24
+ sumc += (score-mean)
25
+ }
26
+ variance = (sum2 - sumc**2 / scores.size) / (scores.size-1)
27
+
28
+ sigma = Math.sqrt(variance)
29
+ if block_given?
30
+ yield(sigma, mean)
31
+ end
32
+
33
+ return sigma
34
+ end
35
+
36
+ def fast_score_sigma
37
+ n, mean, m2 = 0, 0, 0
38
+
39
+ recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
40
+ n = n + 1
41
+ delta = x - mean
42
+ mean = mean + delta/n
43
+ m2 = m2 + delta*(x-mean)
44
+ }
45
+
46
+ variance = m2/(n - 1)
47
+
48
+ if block_given?
49
+ yield(sigma = Math.sqrt(variance), mean)
50
+ end
51
+
52
+ return sigma
53
+ end
54
+
55
+ def fast_score_sigma_precise
56
+ n, mean = 0, 0
57
+
58
+ recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
59
+ n += 1
60
+ delta = x - mean
61
+ mean = mean + delta/n
62
+ }
63
+
64
+ n, m2 = 0, 0
65
+ recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x|
66
+ n = n + 1
67
+ delta = x - mean
68
+ m2 = m2 + delta*(x-mean)
69
+ }
70
+
71
+ variance = m2/(n - 1)
72
+
73
+ if block_given?
74
+ yield(sigma = Math.sqrt(variance), mean)
75
+ end
76
+
77
+ return sigma
78
+ end
79
+
80
+ private
81
+ def recursive_walk(matrix, score, i)
82
+ if i < @size
83
+
84
+ recursive_walk(matrix, score + matrix[0][i], i+1) { |x| yield x }
85
+ recursive_walk(matrix, score + matrix[1][i], i+1) { |x| yield x }
86
+ recursive_walk(matrix, score + matrix[2][i], i+1) { |x| yield x }
87
+ recursive_walk(matrix, score + matrix[3][i], i+1) { |x| yield x }
88
+
89
+ else
90
+ if block_given?
91
+ yield(score)
92
+ else
93
+ raise "no block for recursive walk"
94
+ end
95
+ end
96
+ end
97
+
98
+ end
99
+ end
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/ruby
2
+ module Ytilib
3
+
4
+ srand
5
+
6
+ module Randoom
7
+
8
+ private
9
+
10
+ def Randoom.new_counts
11
+ { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0, 'N' => 0 }
12
+ end
13
+
14
+ def Randoom.random_letter(probs)
15
+ random = rand()
16
+ return 'A' if random < probs['A']
17
+ return 'C' if random < probs['A'] + probs['C']
18
+ return 'G' if random < probs['A'] + probs['C'] + probs['G']
19
+ return 'T'
20
+ end
21
+
22
+ public
23
+
24
+ def Randoom.calc_probs(input)
25
+ counts = new_counts
26
+ counts.default = 0
27
+ (0...input.length).each { |i|
28
+ counts[input[i,1].upcase] += 1
29
+ }
30
+ return make_probs!(counts)
31
+ end
32
+
33
+ def Randoom.rand_seq(req_len, probs = DEF_PROBS, probs_m = nil)
34
+ randoom = ''
35
+ if (probs_m == nil)
36
+ req_len.times { randoom << random_letter(probs) }
37
+ return randoom
38
+ end
39
+ random_l = random_letter(probs)
40
+ randoom = random_l
41
+ (req_len-1).times {
42
+ cur_probs = probs_m[random_l]
43
+ random_l = random_letter(cur_probs)
44
+ randoom << random_l
45
+ }
46
+ return randoom
47
+ end
48
+
49
+ def Randoom.calc_probs_m(input)
50
+ probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
51
+ counts = { 'A' => new_counts, 'C' => new_counts, 'G' => new_counts, 'T' => new_counts, 'N' => new_counts }
52
+ (0...input.length-1).each { |i|
53
+ pair = input[i, 2].upcase
54
+ counts[pair[0,1]][pair[1,1]] += 1
55
+ }
56
+ probs_m['A'] = make_probs!(counts['A'])
57
+ probs_m['C'] = make_probs!(counts['C'])
58
+ probs_m['G'] = make_probs!(counts['G'])
59
+ probs_m['T'] = make_probs!(counts['T'])
60
+ return probs_m
61
+ end
62
+
63
+ def Randoom.make_probs_m!(counts)
64
+ ['A','C','G','T','N'].each { |l2|
65
+ addv = counts['N'][l2] / 4.0
66
+ ['A','C','G','T'].each { |l1|
67
+ counts[l1][l2] += addv
68
+ }
69
+ }
70
+
71
+ probs_m = { 'A' => {}, 'C' => {}, 'G' => {}, 'T' => {} }
72
+ probs_m['A'] = make_probs!(counts['A'])
73
+ probs_m['C'] = make_probs!(counts['C'])
74
+ probs_m['G'] = make_probs!(counts['G'])
75
+ probs_m['T'] = make_probs!(counts['T'])
76
+ return probs_m
77
+ end
78
+
79
+ def Randoom.make_probs!(counts, length = nil)
80
+ probs = { 'A' => 0, 'C' => 0, 'G' => 0, 'T' => 0 }
81
+ length = counts['A'] + counts['C'] + counts['G'] + counts['T'] + counts['N'] if length == nil
82
+ length = length.to_f
83
+ ['A','C','G','T'].each { |l| counts[l] += counts['N'] / 4.0 }
84
+ return probs if length == 0
85
+ probs['A'] = counts['A'] / length
86
+ probs['C'] = counts['C'] / length
87
+ probs['G'] = counts['G'] / length
88
+ probs['T'] = 1 - probs['A'] - probs['C'] - probs['G']
89
+ return probs
90
+ end
91
+
92
+ def Randoom.equalize!(probs)
93
+ probs['A'] = probs['T'] = (probs['A'] + probs['T']) / 2
94
+ probs['C'] = probs['G'] = (probs['C'] + probs['G']) / 2
95
+ return probs
96
+ end
97
+
98
+ def Randoom.twostrand!(probs)
99
+ return Randoom.equalize!(probs)
100
+ end
101
+
102
+ DEF_PROBS = PPM.probs2IUPAC!({ 'A' => 0.25, 'C' => 0.25, 'G' => 0.25, 'T' => 0.25, 'N' => 0.25 })
103
+
104
+ # probabilities counted without _random.fa files for human genome
105
+ DMEL40_PROBS1 = {"A"=>0.287729562173578, "C"=>0.21236364146414, "G"=>0.212259972960341, "T"=>0.287646823401942}
106
+ DMEL40_PROBS2 = {"A"=>0.28768819278776, "C"=>0.21231180721224, "G"=>0.21231180721224, "T"=>0.28768819278776}
107
+
108
+ DMEL40_PROBS1_M = {"A"=>{"A"=>0.350403075314602, "C"=>0.181194374386404, "G"=>0.188361404205017, "T"=>0.280041146093977},
109
+ "C"=>{"A"=>0.325366772443085, "C"=>0.222264645612127, "G"=>0.197213801868993, "T"=>0.255154780075794},
110
+ "G"=>{"A"=>0.260710563672393, "C"=>0.27150575901391, "G"=>0.222294234776053, "T"=>0.245489442537644},
111
+ "T"=>{"A"=>0.217189093089999, "C"=>0.192590127484359, "G"=>0.239869076706963, "T"=>0.350351702718679}}
112
+
113
+ HG17_PROBS1 = {"A"=>0.295309361730334, "C"=>0.204413561169847, "G"=>0.204519414193999, "T"=>0.295757662905821}
114
+ HG17_PROBS2 = {"A"=>0.295533512318077, "C"=>0.204466487681923, "G"=>0.204466487681923, "T"=>0.295533512318077}
115
+
116
+ HG17_PROBS1_M = {"A"=>{"A"=>0.331091206257755, "C"=>0.170458424092748, "G"=>0.236770972081246, "T"=>0.261679397568252},
117
+ "C"=>{"A"=>0.354813019140533, "C"=>0.254741288394943, "G"=>0.0481667110625576, "T"=>0.342278981401966},
118
+ "G"=>{"A"=>0.290057117684408, "C"=>0.208514091370804, "G"=>0.254732297362797, "T"=>0.246696493581991},
119
+ "T"=>{"A"=>0.222087715262152, "C"=>0.200697606508443, "G"=>0.245657322003887, "T"=>0.331557356225517}}
120
+
121
+ HG18_PROBS1 = {"A"=>0.291900580635872, "C"=>0.207855064518284, "G"=>0.207968587245859, "T"=>0.292275767599985}
122
+ HG18_PROBS2 = {"A"=>0.292088174117929, "C"=>0.207911825882071, "G"=>0.207911825882071, "T"=>0.292088174117929}
123
+
124
+ MM9_PROBS1 = {"A"=>0.289755259854654, "C"=>0.210085673636132, "G"=>0.210143929198141, "T"=>0.290015137311074}
125
+ MM9_PROBS2 = {"A"=>0.289885198582864, "C"=>0.210114801417136, "G"=>0.210114801417136, "T"=>0.289885198582864}
126
+
127
+ MM9_PROBS1_M = {"A"=>{"A"=>0.310389104265713, "C"=>0.184962574392377, "G"=>0.251904718465914, "T"=>0.252743602875996}, "C"=>{"A"=>0.352189584318682, "C"=>0.250794045222924, "G"=>0.0494404816637487, "T"=>0.347575888794645}, "G"=>{"A"=>0.295931117515178, "C"=>0.197870954111653, "G"=>0.250756985626016, "T"=>0.255440942747154}, "T"=>{"A"=>0.219437756702452, "C"=>0.214548041970626, "G"=>0.255405334730743, "T"=>0.310608866596179}}
128
+
129
+ end
130
+
131
+ end
@@ -0,0 +1,147 @@
1
+ module Ytilib
2
+ def Ytilib.time
3
+ return Time.now.strftime('%d %b %H:%M:%S')
4
+ end
5
+ end
6
+
7
+ $program_name = nil
8
+
9
+ def start(fullpath)
10
+ report(fullpath + ARGV.inject("") { |out, v| out += " " + v})
11
+ return if $NO_REPORT
12
+ $program_name = "[#{File.name_wo_ext(fullpath)}]"
13
+ end
14
+
15
+ def report(message, program_name = nil)
16
+ $program_name = "[#{program_name}]" if program_name != nil
17
+ return if $NO_REPORT
18
+ puts "LLIB #{Ytilib.time} #{$program_name}\t#{message}" if !block_given? || yield
19
+ end
20
+
21
+ def checkerr(message = "checkerr failed")
22
+ if !block_given? || yield
23
+ puts "LLIB #{Ytilib.time} [error]\t#{message}" unless $NO_REPORT
24
+ raise "LLIB #{Ytilib.time} #{$program_name}\n\t#{message}\n"
25
+ end
26
+ end
27
+
28
+ module Ytilib
29
+
30
+ STRAND_DIRECT = "direct"
31
+ STRAND_REVCOMP = "revcomp"
32
+
33
+ def Ytilib.read_mfa2hash(path)
34
+ input_fasta_f = File.new(path, "r")
35
+ seqs, seq_name = {}, nil
36
+ input_fasta_f.each_line { |line|
37
+ if line[0,1] == ">"
38
+ seq_name = line[1..-1].strip
39
+ seq_name = yield seq_name if block_given?
40
+ checkerr("multiple sequences with the same name=#{seq_name}") { seqs[seq_name] }
41
+ seqs[seq_name] = ""
42
+ elsif seq_name != nil
43
+ seqs[seq_name] << line.strip
44
+ end
45
+ }
46
+ input_fasta_f.close
47
+ return seqs
48
+ end
49
+
50
+ def Ytilib.read_mfa2array(path)
51
+ input_fasta_f = File.new(path, "r")
52
+ seqs, seq_name = [], nil
53
+ input_fasta_f.each_line { |line|
54
+ if line[0,1] == ">"
55
+ seq_name = line[1..-1].strip
56
+ yield seq_name if block_given?
57
+ seqs << ""
58
+ elsif seq_name != nil
59
+ seqs.last << line.strip
60
+ end
61
+ }
62
+ input_fasta_f.close
63
+ return seqs
64
+ end
65
+
66
+ def Ytilib.mfa2array(input)
67
+ seqs, seq_name = [], nil
68
+ input.each_line { |line|
69
+ if line[0,1] == ">"
70
+ seq_name = line[1..-1].strip
71
+ seqs << ""
72
+ elsif seq_name != nil
73
+ seqs.last << line.strip
74
+ end
75
+ }
76
+ return seqs
77
+ end
78
+
79
+ def Ytilib.read_plain2array(path)
80
+ array = []
81
+ File.open(path).each_line { |line|
82
+ array << line.strip if !line.strip.empty?
83
+ }
84
+ return array
85
+ end
86
+
87
+ def Ytilib.read_seqs2array(path)
88
+ type = File.ext_wo_name(path)
89
+ case type
90
+ when "mfa", "fasta", "fa"
91
+ return Ytilib.read_mfa2array(path)
92
+ when "plain","txt"
93
+ return Ytilib.read_plain2array(path)
94
+ else
95
+ checkerr("unknown sequences-file, ext=#{type}")
96
+ end
97
+ end
98
+
99
+ def Ytilib.write_mfa(seqs, path, prefix = " ")
100
+ if seqs.is_a?(Hash)
101
+ out_fasta_f = File.new(path, "w+")
102
+ seqs.each_key { |name|
103
+ out_fasta_f << ">#{prefix}#{name}" << $/ << seqs[name] << $/
104
+ }
105
+ out_fasta_f.close
106
+ else
107
+ out_fasta_f = File.new(path, "w+")
108
+ seqs.each_with_index { |seq, i|
109
+ out_fasta_f << ">#{prefix}#{i+1}" << $/ << seq << $/
110
+ }
111
+ out_fasta_f.close
112
+ end
113
+ end
114
+
115
+ def get_consensus(seqs)
116
+ report "consensus creating method should be checked, you are using unsafe code"
117
+ return 'nil' if seqs.size == 0
118
+ conslet = { 'A' => 'A', 'C' => 'C', 'G' => 'G', 'T' => 'T', 'U' => 'U',
119
+ 'AG' => 'R', 'CT' => 'Y', 'GT' => 'K', 'AC' => 'M', 'CG' => 'S', 'AT' => 'W',
120
+ 'CGT' => 'B', 'AGT' => 'D', 'ACT' => 'H', 'ACG' => 'V', 'ACGT' => 'N'
121
+ }
122
+ new_consensus, letters = '', []
123
+ 0.upto(seqs[0].size-1) { |i|
124
+ seqs.each do |word|
125
+ letters << word[i] if !letters.include?(word[i])
126
+ end
127
+ letters.sort!
128
+ letters_string = ''
129
+ letters.each do |letter| letters_string << letter end
130
+ checkerr("cannot find consensus letter for a given letter set :#{}") { conslet[letters_string] == nil }
131
+ new_consensus << conslet[letters_string]
132
+ letters.clear
133
+ }
134
+ return new_consensus
135
+ end
136
+
137
+ def Ytilib.new_mysql_conn(database)
138
+ my = Mysql.new(MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, database)
139
+ checkerr("cannot connect to MySQL server") { my.query("select 1").fetch_row[0] != "1" }
140
+ return my
141
+ end
142
+
143
+
144
+ end
145
+
146
+ report "ytilib required, working directory #{Dir.pwd}", "ytilib"
147
+ include Ytilib
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sequence_logo/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Ilya Vorontsov"]
6
+ gem.email = ["prijutme4ty@gmail.com"]
7
+ gem.description = %q{SequenceLogo is a tool for drawing sequence logos of motifs. It gets Positional Count Matrices(PCMs) or IUPAC sequences as input and generates png-logos for a motif. Also one can create logo for reverse complement or even generate logos for a whole collection of motifs.
8
+ Sequence logos are a graphical representation of an amino acid or nucleic acid multiple sequence alignment developed by Tom Schneider and Mike Stephens. Each logo consists of stacks of symbols, one stack for each position in the sequence. The overall height of the stack indicates the sequence conservation at that position, while the height of symbols within the stack indicates the relative frequency of each amino or nucleic acid at that position. In general, a sequence logo provides a richer and more precise description of, for example, a binding site, than would a consensus sequence (see http://weblogo.berkeley.edu/)
9
+ }
10
+ gem.summary = %q{Tool for drawing sequence logos of motifs}
11
+ gem.homepage = ""
12
+
13
+ gem.files = `git ls-files`.split($\)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.name = "sequence_logo"
17
+ gem.require_paths = ["lib"]
18
+ gem.version = SequenceLogo::VERSION
19
+
20
+ gem.add_dependency('rmagick', '~> 2.13.1')
21
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sequence_logo
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ilya Vorontsov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-31 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rmagick
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.13.1
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 2.13.1
30
+ description: ! "SequenceLogo is a tool for drawing sequence logos of motifs. It gets
31
+ Positional Count Matrices(PCMs) or IUPAC sequences as input and generates png-logos
32
+ for a motif. Also one can create logo for reverse complement or even generate logos
33
+ for a whole collection of motifs.\n Sequence logos are a graphical representation
34
+ of an amino acid or nucleic acid multiple sequence alignment developed by Tom Schneider
35
+ and Mike Stephens. Each logo consists of stacks of symbols, one stack for each position
36
+ in the sequence. The overall height of the stack indicates the sequence conservation
37
+ at that position, while the height of symbols within the stack indicates the relative
38
+ frequency of each amino or nucleic acid at that position. In general, a sequence
39
+ logo provides a richer and more precise description of, for example, a binding site,
40
+ than would a consensus sequence (see http://weblogo.berkeley.edu/)\n"
41
+ email:
42
+ - prijutme4ty@gmail.com
43
+ executables:
44
+ - create_all_logos
45
+ - generate_logo
46
+ - pmflogo
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - .gitignore
51
+ - Gemfile
52
+ - LICENSE
53
+ - README.md
54
+ - Rakefile
55
+ - bin/create_all_logos
56
+ - bin/generate_logo
57
+ - bin/pmflogo
58
+ - lib/sequence_logo.rb
59
+ - lib/sequence_logo/assets/nucl_simpa/a.png
60
+ - lib/sequence_logo/assets/nucl_simpa/c.png
61
+ - lib/sequence_logo/assets/nucl_simpa/g.png
62
+ - lib/sequence_logo/assets/nucl_simpa/t.png
63
+ - lib/sequence_logo/exec/create_all_logos.rb
64
+ - lib/sequence_logo/exec/generate_logo.rb
65
+ - lib/sequence_logo/exec/pmflogo.rb
66
+ - lib/sequence_logo/pmflogo_lib.rb
67
+ - lib/sequence_logo/version.rb
68
+ - lib/sequence_logo/ytilib.rb
69
+ - lib/sequence_logo/ytilib/addon.rb
70
+ - lib/sequence_logo/ytilib/bismark.rb
71
+ - lib/sequence_logo/ytilib/hack1.rb
72
+ - lib/sequence_logo/ytilib/infocod.rb
73
+ - lib/sequence_logo/ytilib/iupac.rb
74
+ - lib/sequence_logo/ytilib/pm.rb
75
+ - lib/sequence_logo/ytilib/pmsd.rb
76
+ - lib/sequence_logo/ytilib/randoom.rb
77
+ - lib/sequence_logo/ytilib/ytilib.rb
78
+ - sequence_logo.gemspec
79
+ homepage: ''
80
+ licenses: []
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 1.8.24
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: Tool for drawing sequence logos of motifs
103
+ test_files: []