sequence_logo 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +21 -21
- data/README.md +65 -60
- data/Rakefile +5 -5
- data/TODO.txt +8 -0
- data/bin/glue_logos +3 -0
- data/bin/sequence_logo +3 -0
- data/lib/sequence_logo.rb +7 -7
- data/lib/sequence_logo/cli.rb +37 -0
- data/lib/sequence_logo/exec/glue_logos.rb +67 -0
- data/lib/sequence_logo/exec/sequence_logo.rb +52 -0
- data/lib/sequence_logo/pmflogo_lib.rb +95 -174
- data/lib/sequence_logo/version.rb +3 -3
- data/lib/sequence_logo/ytilib.rb +10 -9
- data/lib/sequence_logo/ytilib/addon.rb +246 -246
- data/lib/sequence_logo/ytilib/bismark.rb +70 -70
- data/lib/sequence_logo/ytilib/hack1.rb +75 -75
- data/lib/sequence_logo/ytilib/pm.rb +562 -562
- data/lib/sequence_logo/ytilib/pmsd.rb +1 -1
- data/lib/sequence_logo/ytilib/ppm_support.rb +85 -0
- data/lib/sequence_logo/ytilib/randoom.rb +131 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +146 -146
- data/sequence_logo.gemspec +21 -21
- data/test/data/logo/AHR_si_direct.png +0 -0
- data/test/data/logo/AHR_si_revcomp.png +0 -0
- data/test/data/logo/AIRE_f2_direct.png +0 -0
- data/test/data/logo/AIRE_f2_revcomp.png +0 -0
- data/test/data/pcm/AHR_si.pcm +10 -0
- data/test/data/pcm/AIRE_f2.pcm +19 -0
- metadata +33 -32
- data/bin/create_all_logos +0 -3
- data/bin/generate_logo +0 -3
- data/bin/pmflogo +0 -3
- data/lib/sequence_logo/exec/create_all_logos.rb +0 -25
- data/lib/sequence_logo/exec/generate_logo.rb +0 -18
- data/lib/sequence_logo/exec/pmflogo.rb +0 -26
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ebd54ee5bf1c9fece6a4441f6c6e680c6a0af742
|
4
|
+
data.tar.gz: 7db836f3a01d63b71200496227da726cb681698e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 03ff397f94506ebc41098bb1d897ed078496d70f516e5193ef5006a3db686cd95dcea45e42c410f87f8ecbc7c74eec0015f4d10a133bd1136058d39ed7dad05c
|
7
|
+
data.tar.gz: 6bb47ee9237f020ea1de0d566642e813fc7de8e7eb87089bbac0259c26f8f5e22b192b3f2269ba0e8feba045e92bc6f5b965266547b5e2600ddb00d07655a350
|
data/.gitignore
CHANGED
@@ -1,17 +1,17 @@
|
|
1
|
-
*.gem
|
2
|
-
*.rbc
|
3
|
-
.bundle
|
4
|
-
.config
|
5
|
-
.yardoc
|
6
|
-
Gemfile.lock
|
7
|
-
InstalledFiles
|
8
|
-
_yardoc
|
9
|
-
coverage
|
10
|
-
doc/
|
11
|
-
lib/bundler/man
|
12
|
-
pkg
|
13
|
-
rdoc
|
14
|
-
spec/reports
|
15
|
-
test/tmp
|
16
|
-
test/version_tmp
|
17
|
-
tmp
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
data/Gemfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
|
3
|
-
# Specify your gem's dependencies in sequence_logo.gemspec
|
4
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in sequence_logo.gemspec
|
4
|
+
gemspec
|
data/LICENSE
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
Copyright (c) 2012 Ilya Vorontsov
|
2
|
-
|
3
|
-
MIT License
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
-
a copy of this software and associated documentation files (the
|
7
|
-
"Software"), to deal in the Software without restriction, including
|
8
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be
|
14
|
-
included in all copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
1
|
+
Copyright (c) 2012 Ilya Vorontsov
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
22
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,61 +1,66 @@
|
|
1
|
-
# SequenceLogo
|
2
|
-
|
3
|
-
SequenceLogo is a tool for drawing sequence logos of motifs. It gets Positional Count Matrices(PCMs) at input and generates png-logos for motif. Also one can create logo for reverse complement or even generate logos for a whole collection of motifs.
|
4
|
-
Sequence logos are a graphical representation of an amino acid or nucleic acid multiple sequence alignment developed by Tom Schneider and Mike Stephens. Each logo consists of stacks of symbols, one stack for each position in the sequence. The overall height of the stack indicates the sequence conservation at that position, while the height of symbols within the stack indicates the relative frequency of each amino or nucleic acid at that position. In general, a sequence logo provides a richer and more precise description of, for example, a binding site, than would a consensus sequence (see http://weblogo.berkeley.edu/)
|
5
|
-
|
6
|
-
|
7
|
-
## Installation
|
8
|
-
|
9
|
-
Add this line to your application's Gemfile:
|
10
|
-
|
11
|
-
gem 'sequence_logo'
|
12
|
-
|
13
|
-
And then execute:
|
14
|
-
|
15
|
-
$ bundle
|
16
|
-
|
17
|
-
Or install it yourself as:
|
18
|
-
|
19
|
-
$ gem install sequence_logo
|
20
|
-
|
21
|
-
## Usage
|
22
|
-
|
23
|
-
SequenceLogo consists of
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
*
|
32
|
-
*
|
33
|
-
|
34
|
-
|
35
|
-
*
|
36
|
-
*
|
37
|
-
*
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
1
|
+
# SequenceLogo
|
2
|
+
|
3
|
+
SequenceLogo is a tool for drawing sequence logos of motifs. It gets Positional Count Matrices(PCMs) at input and generates png-logos for motif. Also one can create logo for reverse complement or even generate logos for a whole collection of motifs.
|
4
|
+
Sequence logos are a graphical representation of an amino acid or nucleic acid multiple sequence alignment developed by Tom Schneider and Mike Stephens. Each logo consists of stacks of symbols, one stack for each position in the sequence. The overall height of the stack indicates the sequence conservation at that position, while the height of symbols within the stack indicates the relative frequency of each amino or nucleic acid at that position. In general, a sequence logo provides a richer and more precise description of, for example, a binding site, than would a consensus sequence (see http://weblogo.berkeley.edu/)
|
5
|
+
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'sequence_logo'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install sequence_logo
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
SequenceLogo consists of two tools:
|
24
|
+
|
25
|
+
sequence_logo [options] <input files>...
|
26
|
+
|
27
|
+
*input_file* can be either in PCM format (file extension should be .pat or .pcm), or in FASTA format (file extensions: .mfa, .fasta, .plain), or in SMall BiSMark format (.xml), or in IUPAC format (any other extension). In future releases formats except PCM and PPM will be removed in preference of Unix-like modular style.
|
28
|
+
|
29
|
+
Optional parameters:
|
30
|
+
|
31
|
+
* --x-unit SIZE - width of a single letter
|
32
|
+
* --y-unit SIZE - base height of a letter
|
33
|
+
* --words-count WEIGHT - float number that represents alignment weight. If words count not defined - it'd be obtained from input if input file is a PCM. If input file is a PPM words_count can't be obtained. In such a case discrete logo can't be drawn, and weblogo will be drawn instead.
|
34
|
+
* --icd-mode <weblogo|discrete> - information content mode
|
35
|
+
* --orientation <direct|revcomp|both> - create logo for a direct, reverse-complement or both orientations of motif
|
36
|
+
* --scheme FOLDER - name of folder containing nucleotide images
|
37
|
+
* --threshold-lines - draw lines on specific levels
|
38
|
+
|
39
|
+
* Tool **glue_logos** generates a single image of aligned motifs.
|
40
|
+
|
41
|
+
`glue_logos <output file> <file with alignment infos>`
|
42
|
+
|
43
|
+
or
|
44
|
+
|
45
|
+
`<alignment infos> | glue_logos <output file>`
|
46
|
+
|
47
|
+
Input data comes either from file with alignments or from stdin. *glue_logos* is designated to work fine with macroape *align_motifs* tool and has input format the same as output format of *align_motifs* tool:
|
48
|
+
pcm_file_1 shift_1 orientation_1
|
49
|
+
pcm_file_2 shift_2 orientation_2
|
50
|
+
pcm_file_3 shift_3 orientation_3
|
51
|
+
|
52
|
+
So it's simple to run
|
53
|
+
|
54
|
+
align_motifs --pcm leader.pcm other_motifs_1.pcm other_motifs_2.pcm | glue_logos cluster.png
|
55
|
+
|
56
|
+
Don't forget to specify PCM files instead of PWM files and use `--pcm` key in align_motifs.
|
57
|
+
|
58
|
+
## Contributing
|
59
|
+
|
60
|
+
1. Fork it
|
61
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
62
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
63
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
64
|
+
5. Create new Pull Request
|
65
|
+
|
61
66
|
Copyright (c) 2011-2012 Ivan Kulakovskiy(author), Ilya Vorontsov(refactoring and gemification)
|
data/Rakefile
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
#!/usr/bin/env rake
|
2
|
-
require 'bundler/gem_tasks'
|
3
|
-
|
4
|
-
# require 'rspec/core/rake_task'
|
5
|
-
# RSpec::Core::RakeTask.new
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require 'bundler/gem_tasks'
|
3
|
+
|
4
|
+
# require 'rspec/core/rake_task'
|
5
|
+
# RSpec::Core::RakeTask.new
|
data/TODO.txt
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
AssetsPath shouldn't be used at draw_logo level because it makes it impossible to change scheme on one folder that is outside the gem
|
2
|
+
And how to make possible use schemes inside the assets path
|
3
|
+
|
4
|
+
Make use of Tempfile in glue_logos.
|
5
|
+
|
6
|
+
Make tests
|
7
|
+
|
8
|
+
Wrap execs into methods
|
data/bin/glue_logos
ADDED
data/bin/sequence_logo
ADDED
data/lib/sequence_logo.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
end
|
1
|
+
require_relative 'sequence_logo/version'
|
2
|
+
require_relative 'sequence_logo/pmflogo_lib'
|
3
|
+
require_relative 'sequence_logo/cli'
|
4
|
+
|
5
|
+
module SequenceLogo
|
6
|
+
AssetsPath = File.join(File.dirname(__FILE__), 'sequence_logo', 'assets')
|
7
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'optparse'
|
3
|
+
module SequenceLogo
|
4
|
+
class CLI
|
5
|
+
attr_reader :options
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options.dup
|
8
|
+
end
|
9
|
+
def parse_options!(argv)
|
10
|
+
parser.parse!(argv)
|
11
|
+
options
|
12
|
+
end
|
13
|
+
def parser
|
14
|
+
@parser ||= OptionParser.new do |opts|
|
15
|
+
opts.on('-x', '--x-unit X_UNIT', 'Single letter width') do |v|
|
16
|
+
options[:x_unit] = v.to_i
|
17
|
+
end
|
18
|
+
opts.on('-y', '--y-unit Y_UNIT', 'Base letter height') do |v|
|
19
|
+
options[:y_unit] = v.to_i
|
20
|
+
end
|
21
|
+
opts.on('--words-count WEIGHT', 'Define alignment weight') do |v|
|
22
|
+
options[:words_count] = v.to_f
|
23
|
+
end
|
24
|
+
opts.on('--icd-mode MODE', 'Calculation mode: discrete or weblogo', 'Weblogo is assumed if word count not given') do |v|
|
25
|
+
options[:icd_mode] = v.to_sym
|
26
|
+
raise ArgumentError, 'icd-mode can be either discrete or weblogo' unless [:discrete, :weblogo].include?(options[:icd_mode])
|
27
|
+
end
|
28
|
+
opts.on('--[no-]threshold-lines', 'Draw threshold lines') do |v|
|
29
|
+
options[:threshold_lines] = v
|
30
|
+
end
|
31
|
+
opts.on('--scheme SCHEME', 'Specify folder with nucleotide images') do |v|
|
32
|
+
options[:scheme] = v
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require_relative '../../sequence_logo'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
begin
|
5
|
+
doc = <<-EOS
|
6
|
+
Usage:
|
7
|
+
glue_logos <output file> <alignment infos file>
|
8
|
+
or
|
9
|
+
<alignment infos file> | glue_logos <output file>
|
10
|
+
|
11
|
+
Alignment infos has format:
|
12
|
+
pcm_file_1 shift_1 orientation_1
|
13
|
+
pcm_file_2 shift_2 orientation_2
|
14
|
+
pcm_file_3 shift_3 orientation_3
|
15
|
+
EOS
|
16
|
+
|
17
|
+
argv = ARGV
|
18
|
+
default_options = {x_unit: 30, y_unit: 60, words_count: nil, icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa', logo_shift: 300, text_size_pt: 24}
|
19
|
+
cli = SequenceLogo::CLI.new(default_options)
|
20
|
+
cli.instance_eval do
|
21
|
+
parser.banner = doc
|
22
|
+
parser.on_head('--logo-shift SHIFT', 'Width of region for labels') do |v|
|
23
|
+
options[:logo_shift] = v.to_i
|
24
|
+
end
|
25
|
+
parser.on_head('--text-size SIZE', 'Text size in points') do |v|
|
26
|
+
options[:text_size] = v.to_f
|
27
|
+
end
|
28
|
+
end
|
29
|
+
options = cli.parse_options!(argv)
|
30
|
+
|
31
|
+
output_file = argv.shift
|
32
|
+
raise ArgumentError, 'Specify output file' unless output_file
|
33
|
+
|
34
|
+
raise 'You can specify alignment infos either from file or from stdin. Don\'t use both sources simultaneously' if !ARGV.empty? && !$stdin.tty?
|
35
|
+
if !ARGV.empty?
|
36
|
+
alignment_infos = File.readlines(ARGV.shift)
|
37
|
+
elsif !$stdin.tty?
|
38
|
+
alignment_infos = $stdin.readlines
|
39
|
+
else
|
40
|
+
raise ArgumentError, 'Specify alignment infos'
|
41
|
+
end
|
42
|
+
|
43
|
+
logos = {}
|
44
|
+
logo_filenames = []
|
45
|
+
alignment_infos.each do |line|
|
46
|
+
filename, shift, orientation = line.strip.split("\t")
|
47
|
+
ppm = get_ppm_from_file(filename)
|
48
|
+
checkerr("bad input file: #{filename}") { ppm == nil }
|
49
|
+
shift = shift.to_i
|
50
|
+
logo_filename = "#{filename}_temp.png"
|
51
|
+
logo_filenames << logo_filename
|
52
|
+
case orientation
|
53
|
+
when 'direct'
|
54
|
+
SequenceLogo.draw_logo(ppm, options).write(logo_filename)
|
55
|
+
when 'revcomp'
|
56
|
+
SequenceLogo.draw_logo(ppm.revcomp, options).write(logo_filename)
|
57
|
+
else
|
58
|
+
raise "Unknown orientation #{orientation} for #{filename}"
|
59
|
+
end
|
60
|
+
logos[logo_filename] = {shift: shift, length: ppm.length, name: File.basename(filename, File.extname(filename))}
|
61
|
+
end
|
62
|
+
|
63
|
+
SequenceLogo.glue_files(logos, output_file, options)
|
64
|
+
logo_filenames.each{|filename| File.delete(filename) }
|
65
|
+
rescue => err
|
66
|
+
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
|
67
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require_relative '../../sequence_logo'
|
2
|
+
require 'shellwords'
|
3
|
+
|
4
|
+
begin
|
5
|
+
doc = <<-EOS
|
6
|
+
sequence_logo is a tool for drawing motif logos. It is able to process PCM files either as a position matrix (*.pat or *.pcm), or in FASTA format (file extensions: .mfa, .fasta, .plain), or in SMall BiSMark format (.xml), or in IUPAC format (any other extension).
|
7
|
+
Usage:
|
8
|
+
sequence_logo [options] <pcm/ppm file>...
|
9
|
+
or
|
10
|
+
ls pcm_folder/*.pcm | sequence_logo [options]
|
11
|
+
EOS
|
12
|
+
|
13
|
+
argv = ARGV
|
14
|
+
default_options = {x_unit: 30, y_unit: 60, words_count: nil, orientation: :both, logo_folder: '.', icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa'}
|
15
|
+
cli = SequenceLogo::CLI.new(default_options)
|
16
|
+
cli.instance_eval do
|
17
|
+
parser.banner = doc
|
18
|
+
parser.on_head('--logo-folder FOLDER', 'Folder to store generated logos') do |v|
|
19
|
+
options[:logo_folder] = v
|
20
|
+
end
|
21
|
+
parser.on_head('--orientation ORIENTATION', 'Which logo to draw: direct/revcomp/both') do |v|
|
22
|
+
v = v.to_sym
|
23
|
+
raise ArgumentError, 'Orientation can be either direct or revcomp or both' unless [:direct, :revcomp, :both].include?(v)
|
24
|
+
options[:orientation] = v
|
25
|
+
end
|
26
|
+
end
|
27
|
+
options = cli.parse_options!(argv)
|
28
|
+
|
29
|
+
logo_folder = options[:logo_folder]
|
30
|
+
Dir.mkdir(logo_folder) unless Dir.exist?(logo_folder)
|
31
|
+
|
32
|
+
filenames = argv
|
33
|
+
filenames += $stdin.read.shellsplit unless $stdin.tty?
|
34
|
+
raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
|
35
|
+
|
36
|
+
filenames.each do |filename|
|
37
|
+
ppm = get_ppm_from_file(filename)
|
38
|
+
checkerr("bad input file: #{filename}") { ppm == nil }
|
39
|
+
|
40
|
+
filename_wo_ext = File.basename(filename, File.extname(filename))
|
41
|
+
if [:direct, :both].include?(options[:orientation])
|
42
|
+
direct_output = File.join(logo_folder, "#{filename_wo_ext}_direct.png")
|
43
|
+
SequenceLogo.draw_logo(ppm, options).write(direct_output)
|
44
|
+
end
|
45
|
+
if [:revcomp, :both].include?(options[:orientation])
|
46
|
+
revcomp_output = File.join(logo_folder, "#{filename_wo_ext}_revcomp.png")
|
47
|
+
SequenceLogo.draw_logo(ppm.revcomp, options).write(revcomp_output)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
rescue => err
|
51
|
+
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
|
52
|
+
end
|
@@ -1,193 +1,114 @@
|
|
1
|
-
|
1
|
+
require_relative 'ytilib'
|
2
2
|
require 'RMagick'
|
3
3
|
|
4
|
-
|
5
|
-
def
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
module SequenceLogo
|
5
|
+
def self.draw_threshold_lines(i_logo, ppm)
|
6
|
+
x_size = i_logo.columns
|
7
|
+
y_size = i_logo.rows
|
8
|
+
|
9
|
+
line2of4 = y_size - ppm.get_line(ppm.icd2of4) * y_size
|
10
|
+
lineThc = y_size - ppm.get_line(ppm.icdThc) * y_size
|
11
|
+
lineTlc = y_size - ppm.get_line(ppm.icdTlc) * y_size
|
12
|
+
|
13
|
+
dr = Magick::Draw.new
|
14
|
+
dr.fill('transparent')
|
15
|
+
|
16
|
+
dr.stroke_width(y_size / 200.0)
|
17
|
+
dr.stroke_dasharray(7,7)
|
18
|
+
|
19
|
+
dr.stroke('silver')
|
20
|
+
dr.line(0, line2of4, x_size, line2of4)
|
21
|
+
dr.line(0, lineThc, x_size, lineThc)
|
22
|
+
dr.line(0, lineTlc, x_size, lineTlc)
|
23
|
+
|
24
|
+
dr.draw(i_logo)
|
11
25
|
end
|
12
|
-
|
13
|
-
def
|
14
|
-
|
15
|
-
|
26
|
+
|
27
|
+
def self.create_canvas(ppm, options)
|
28
|
+
x_size = options[:x_unit] * ppm.length
|
29
|
+
y_size = options[:y_unit]
|
30
|
+
|
31
|
+
i_logo = Magick::ImageList.new
|
32
|
+
if options[:icd_mode] == :discrete
|
33
|
+
i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'white'))
|
34
|
+
draw_threshold_lines(i_logo, ppm) if options[:threshold_lines]
|
16
35
|
else
|
17
|
-
|
36
|
+
i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'bisque'))
|
18
37
|
end
|
38
|
+
|
39
|
+
i_logo
|
19
40
|
end
|
20
41
|
|
42
|
+
def self.letter_images(scheme_dir)
|
43
|
+
if File.exist?(File.join(scheme_dir,'a.png'))
|
44
|
+
extension = 'png'
|
45
|
+
elsif File.exist?(File.join(scheme_dir,'a.gif'))
|
46
|
+
extension = 'gif'
|
47
|
+
else
|
48
|
+
raise "Scheme not exists in folder #{scheme_dir}"
|
49
|
+
end
|
21
50
|
|
22
|
-
|
23
|
-
|
24
|
-
@matrix['A'].each_index { |i|
|
25
|
-
rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
|
26
|
-
pn = @matrix[l][i]
|
27
|
-
sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
|
28
|
-
}
|
29
|
-
}
|
30
|
-
|
31
|
-
mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
|
32
|
-
@matrix['A'].each_index { |i|
|
33
|
-
['A','C','G','T'].each { |l|
|
34
|
-
mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
|
35
|
-
}
|
36
|
-
}
|
37
|
-
|
38
|
-
mat
|
51
|
+
letter_files = %w[a c g t].collect{|letter| File.join(scheme_dir, "#{letter}.#{extension}") }
|
52
|
+
Magick::ImageList.new(*letter_files)
|
39
53
|
end
|
40
54
|
|
41
|
-
def
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
55
|
+
def self.draw_letters_on_canvas(i_logo, i_letters, ppm, options)
|
56
|
+
y_unit = options[:y_unit]
|
57
|
+
x_unit = options[:x_unit]
|
58
|
+
matrix = ppm.get_logo(options[:icd_mode])
|
59
|
+
matrix['A'].each_index { |i|
|
60
|
+
y_pos = 0
|
61
|
+
sorted_letters = ['A', 'C', 'G', 'T'].collect { |letter| {:score => matrix[letter][i], :letter => letter} }.sort_by { |pair| pair[:score] }.collect { |pair| pair[:letter] }.reverse
|
62
|
+
sorted_letters.each { |letter|
|
63
|
+
next if y_unit * matrix[letter][i] <= 1
|
64
|
+
letter_index = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}[letter]
|
65
|
+
y_block = (y_unit * matrix[letter][i]).round
|
66
|
+
i_logo << i_letters[letter_index].dup.resize(x_unit, y_block)
|
67
|
+
y_pos += y_block
|
68
|
+
i_logo.cur_image.page = Magick::Rectangle.new(0, 0, i * x_unit, y_unit - y_pos )
|
53
69
|
}
|
54
70
|
}
|
55
|
-
|
56
|
-
mat
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def get_ppm_from_file(in_file_name, words_count)
|
61
|
-
case File.ext_wo_name(in_file_name)
|
62
|
-
when 'pat', 'pcm'
|
63
|
-
pm = PM.load(in_file_name)
|
64
|
-
pm.fixwc if pm.words_count
|
65
|
-
when 'mfa', 'fasta', 'plain'
|
66
|
-
pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
|
67
|
-
when 'xml'
|
68
|
-
pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
|
69
|
-
when in_file_name
|
70
|
-
pm = PPM.from_IUPAC(in_file_name.upcase)
|
71
71
|
end
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
i_logo = Magick::ImageList.new
|
80
|
-
if paper_mode
|
81
|
-
i_logo.new_image(x_size, y_size)
|
82
|
-
else
|
83
|
-
if icd_mode == :discrete
|
84
|
-
i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'white'))
|
85
|
-
if threshold_lines
|
86
|
-
dr = Magick::Draw.new
|
87
|
-
dr.fill('transparent')
|
88
|
-
|
89
|
-
dr.stroke_width(y_size / 200.0)
|
90
|
-
dr.stroke_dasharray(7,7)
|
91
|
-
|
92
|
-
line2of4 = y_size - pm.get_line(pm.icd2of4) * y_size
|
93
|
-
lineThc = y_size - pm.get_line(pm.icdThc) * y_size
|
94
|
-
lineTlc = y_size - pm.get_line(pm.icdTlc) * y_size
|
95
|
-
|
96
|
-
dr.stroke('silver')
|
97
|
-
dr.line(0, line2of4, x_size, line2of4)
|
98
|
-
dr.line(0, lineThc, x_size, lineThc)
|
99
|
-
dr.line(0, lineTlc, x_size, lineTlc)
|
100
|
-
|
101
|
-
dr.draw(i_logo)
|
102
|
-
end
|
103
|
-
else
|
104
|
-
i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'bisque'))
|
72
|
+
|
73
|
+
def self.draw_logo(ppm, options = {})
|
74
|
+
ppm.words_count = options[:words_count] if options[:words_count]
|
75
|
+
unless ppm.words_count
|
76
|
+
report "words count for PPM is undefined, assuming weblogo mode"
|
77
|
+
options[:icd_mode] = :weblogo
|
105
78
|
end
|
79
|
+
i_logo = create_canvas(ppm, options)
|
80
|
+
scheme_dir = File.join(AssetsPath, options[:scheme])
|
81
|
+
draw_letters_on_canvas(i_logo, letter_images(scheme_dir), ppm, options)
|
82
|
+
i_logo = i_logo.flatten_images
|
106
83
|
end
|
107
|
-
i_logo
|
108
|
-
end
|
109
|
-
|
110
|
-
def letter_images(scheme_dir)
|
111
|
-
if File.exist?(File.join(scheme_dir,'a.png'))
|
112
|
-
lp = {'A' => File.join(scheme_dir,'a.png'), 'C' => File.join(scheme_dir,'c.png'), 'G' => File.join(scheme_dir,'g.png'), 'T' => File.join(scheme_dir,'t.png')}
|
113
|
-
elsif File.exist?(File.join(scheme_dir,'a.gif'))
|
114
|
-
lp = {'A' => File.join(scheme_dir,'a.gif'), 'C' => File.join(scheme_dir,'c.gif'), 'G' => File.join(scheme_dir,'g.gif'), 'T' => File.join(scheme_dir,'t.gif')}
|
115
|
-
else
|
116
|
-
raise "Scheme not exists in folder #{scheme_dir}"
|
117
|
-
end
|
118
|
-
i_letters = Magick::ImageList.new(lp['A'], lp['C'], lp['G'], lp['T'])
|
119
|
-
end
|
120
|
-
|
121
|
-
def draw_letters_on_canvas(i_logo, i_letters, matrix, y_size, x_unit)
|
122
|
-
matrix['A'].each_index { |i|
|
123
|
-
y_pos = 0
|
124
|
-
sorted_letters = ['A', 'C', 'G', 'T'].collect { |letter| {:score => matrix[letter][i], :letter => letter} }.sort_by { |pair| pair[:score] }.collect { |pair| pair[:letter] }.reverse
|
125
|
-
sorted_letters.each { |letter|
|
126
|
-
next if y_size * matrix[letter][i] <= 1
|
127
|
-
letter_index = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}[letter]
|
128
|
-
y_block = (y_size * matrix[letter][i]).round
|
129
|
-
i_logo << i_letters[letter_index].dup.resize(x_unit, y_block)
|
130
|
-
y_pos += y_block
|
131
|
-
i_logo.cur_image.page = Magick::Rectangle.new(0, 0, i * x_unit, y_size - y_pos )
|
132
|
-
}
|
133
|
-
}
|
134
|
-
end
|
135
|
-
|
136
|
-
|
137
|
-
def draw_logo(in_file_name, out_file_name, options = {})
|
138
|
-
default_options = { words_count: nil,
|
139
|
-
x_unit: 100,
|
140
|
-
y_size: 200,
|
141
|
-
icd_mode: 'discrete',
|
142
|
-
revcomp: false,
|
143
|
-
scheme: 'nucl_simpa',
|
144
|
-
paper_mode: false,
|
145
|
-
threshold_lines: true }
|
146
|
-
|
147
|
-
options = options.reject{|k,v| v == 'default' || v == :default}
|
148
|
-
options = default_options.merge( options )
|
149
|
-
|
150
|
-
x_unit = options[:x_unit].to_i
|
151
|
-
y_size = options[:y_size].to_i
|
152
|
-
icd_mode = options[:icd_mode].to_sym
|
153
|
-
scheme = options[:scheme]
|
154
|
-
|
155
|
-
words_count = options[:words_count]
|
156
|
-
words_count = words_count.to_f if words_count
|
157
|
-
|
158
|
-
revcomp = options[:revcomp]
|
159
|
-
revcomp = false if revcomp == 'no' || revcomp == 'false' || revcomp == 'direct'
|
160
|
-
|
161
|
-
paper_mode = options[:paper_mode]
|
162
|
-
paper_mode = false if paper_mode == 'no' || paper_mode == 'false'
|
163
|
-
|
164
|
-
threshold_lines = options[:threshold_lines]
|
165
|
-
threshold_lines = false if threshold_lines == 'no' || threshold_lines == 'false'
|
166
|
-
|
167
|
-
########################
|
168
|
-
|
169
|
-
pm = get_ppm_from_file(in_file_name, words_count)
|
170
|
-
checkerr("bad input file") { pm == nil }
|
171
|
-
|
172
|
-
x_size = x_unit * pm.length
|
173
|
-
|
174
|
-
|
175
|
-
unless pm.words_count
|
176
|
-
report "words count for PM is undefined, assuming weblogo mode"
|
177
|
-
icd_mode = :weblogo
|
178
|
-
end
|
179
|
-
|
180
|
-
i_logo = create_canvas(x_size, y_size, icd_mode, paper_mode, threshold_lines, pm)
|
181
|
-
|
182
|
-
pm.revcomp! if revcomp
|
183
|
-
matrix = pm.get_logo(icd_mode)
|
184
84
|
|
185
|
-
|
186
|
-
|
187
|
-
|
85
|
+
# logos = { filename => {shift: ..., length: ..., name: ...} }
|
86
|
+
def self.glue_files(logos, output_file, options)
|
87
|
+
logo_shift = options[:logo_shift] || 300
|
88
|
+
x_unit = options[:x_unit] || 30
|
89
|
+
y_unit = options[:y_unit] || 60
|
90
|
+
text_size = options[:text_size] || 24
|
91
|
+
|
92
|
+
leftmost_shift = logos.map{|file,infos| infos[:shift] }.min
|
93
|
+
logos.each{|file, infos| infos[:shift] -= leftmost_shift}
|
94
|
+
full_alignment_size = logos.map{|file,infos| infos[:length] + infos[:shift] }.max
|
95
|
+
|
96
|
+
x_size = logo_shift + full_alignment_size * x_unit
|
97
|
+
y_size = logos.size * y_unit
|
98
|
+
command_string = "convert -size #{ x_size }x#{ y_size } -pointsize #{text_size} xc:white "
|
99
|
+
logos.each_with_index do |(logo_filename,infos), idx|
|
100
|
+
logo_x_start = logo_shift + infos[:shift] * x_unit
|
101
|
+
logo_y_start = y_unit * idx
|
102
|
+
command_string << "\"#{ logo_filename }\" -geometry +#{ logo_x_start }+#{ logo_y_start } -composite "
|
103
|
+
end
|
188
104
|
|
189
|
-
|
190
|
-
|
105
|
+
command_draw_names = ""
|
106
|
+
logos.each_with_index do |(logo_filename,infos), idx|
|
107
|
+
text_x_start = 10
|
108
|
+
text_y_start = y_unit * (idx + 0.5)
|
109
|
+
command_draw_names << "-draw \"text #{ text_x_start },#{ text_y_start } '#{infos[:name]}'\" "
|
110
|
+
end
|
191
111
|
|
192
|
-
|
112
|
+
system(command_string + command_draw_names + "\"#{output_file}\"")
|
113
|
+
end
|
193
114
|
end
|