sequence_logo 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 885b0f25932abe6a453fcbfb48eaae9e5227383a
4
- data.tar.gz: fc7ca0ff71c311e8d7301d086ed3b4d153f0493f
3
+ metadata.gz: 4ed1b2f635b68cfa6d69d47f1e122c81f917e286
4
+ data.tar.gz: 493c1d3b807804a5f9ee670df8d7fbc5f6fe883f
5
5
  SHA512:
6
- metadata.gz: c47be1cb8060cd2ed305d4d21862b86db24d6b24b7b5989fbe86a4fb245c2b8db6a07595a2719c922d0a32bdd10d59f8e916c87c23610911c1a18ca547148ce8
7
- data.tar.gz: cfc2df9b0029a19f17070a9937e6a5c61ccebee6ff0c1e65dac5da573c8e5e23de23b23ae240f00dcfc06b525f8e748c35ebfdd006765e2cd608e820450dc72f
6
+ metadata.gz: b6f56777b12c9dd26c0f3523f3762246baa42a070b045126fabb1b3250b9bba5baa4ff8685a80d06c2b8fabdd095a0c5512a3c0c7096016dd65f1983b24f12e8
7
+ data.tar.gz: d8bed3b3cab1287681a440b020166f8a5012f02ffb61bfb87ddb7437d23b07298c85cd68fc8a0713020ec8470362d7ea0ae4ad2738140553dfc595f7e5d00752
data/README.md CHANGED
@@ -38,16 +38,17 @@ SequenceLogo consists of two tools:
38
38
 
39
39
  * Tool **glue_logos** generates a single image of aligned motifs.
40
40
 
41
- `glue_logos <output file> <file with alignment infos>`
42
-
41
+ ```glue_logos <output file> <file with alignment infos>```
42
+
43
43
  or
44
44
 
45
- `<alignment infos> | glue_logos <output file>`
45
+ <alignment infos> | glue_logos <output file>
46
+
47
+ Input data comes either from file with alignments or from stdin. *glue_logos* is designated to work fine with macroape *align_motifs* tool and has input format the same as output format of *align_motifs* tool:
46
48
 
47
- Input data comes either from file with alignments or from stdin. *glue_logos* is designated to work fine with macroape *align_motifs* tool and has input format the same as output format of *align_motifs* tool:
48
- pcm_file_1 shift_1 orientation_1
49
- pcm_file_2 shift_2 orientation_2
50
- pcm_file_3 shift_3 orientation_3
49
+ pcm_file_1 shift_1 orientation_1 [motif_name_1]
50
+ pcm_file_2 shift_2 orientation_2 [motif_name_2]
51
+ pcm_file_3 shift_3 orientation_3 [motif_name_3]
51
52
 
52
53
  So it's simple to run
53
54
 
@@ -0,0 +1,70 @@
1
+ require_relative 'canvases'
2
+
3
+ module SequenceLogo
4
+ class Alignment
5
+ # object to be aligned should respond to #name, #render, #revcomp
6
+ class Item
7
+ attr_reader :object, :shift
8
+
9
+ def initialize(object, shift)
10
+ @object, @shift = object, shift
11
+ end
12
+
13
+ def length
14
+ @object.length
15
+ end
16
+
17
+ def name
18
+ @object.name
19
+ end
20
+
21
+ def render(canvas_factory)
22
+ object_image = object.render(canvas_factory)
23
+ shifted_image = canvas_factory.shifted_logo(object_image, shift)
24
+ canvas_factory.logo_with_name(shifted_image, name)
25
+ end
26
+ end
27
+
28
+ ####################
29
+
30
+ def initialize(items = [])
31
+ @alignable_items = items
32
+ end
33
+
34
+ def +(item)
35
+ Alignment.new(@alignable_items + [item])
36
+ end
37
+
38
+ def revcomp
39
+ items_reversed = @alignable_items.map{|item|
40
+ shift_reversed = rightmost_position - item.shift - item.length
41
+ Item.new(item.object.revcomp, shift_reversed)
42
+ }
43
+ Alignment.new(items_reversed)
44
+ end
45
+
46
+ def render(canvas_factory)
47
+ canvas = VerticalGluingCanvas.new
48
+ items_normalized.each do |item|
49
+ canvas.add_image item.render(canvas_factory)
50
+ end
51
+ canvas.background(Magick::HatchFill.new('white', 'white'))
52
+ canvas.image
53
+ end
54
+
55
+ private
56
+
57
+ # return list of items shifted altogether such that minimal shift is zero
58
+ def items_normalized
59
+ @alignable_items.map{|item| Item.new(item.object, item.shift - leftmost_shift) }
60
+ end
61
+
62
+ def leftmost_shift
63
+ @alignable_items.map(&:shift).min
64
+ end
65
+
66
+ def rightmost_position
67
+ @alignable_items.map{|item| item.shift + item.length }.max
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,96 @@
1
+ require_relative 'magick_support'
2
+ require_relative 'canvases'
3
+
4
+ module SequenceLogo
5
+ class CanvasFactory
6
+ attr_reader :x_unit, :y_unit, :text_size, :logo_shift
7
+ attr_reader :letter_images
8
+
9
+ def initialize(letter_images, options = {})
10
+ @letter_images = letter_images # .map{|letter_image| letter_image.dup.resize(x_size, y_size) }
11
+ @logo_shift = options[:logo_shift] || 300
12
+ @x_unit = options[:x_unit] || 30
13
+ @y_unit = options[:y_unit] || 60
14
+ @text_size = options[:text_size] || 24
15
+ end
16
+
17
+ def text_image(text, img_height = y_unit)
18
+ text_img = Magick::Image.new(logo_shift, img_height){ self.background_color = 'transparent' }
19
+ annotation = Magick::Draw.new
20
+ annotation.pointsize(text_size)
21
+ annotation.text(10, img_height / 2, text)
22
+ annotation.draw(text_img)
23
+ text_img
24
+ end
25
+
26
+ def shifted_logo(image, shift)
27
+ canvas = HorizontalGluingCanvas.new
28
+ canvas.add_image Magick::Image.new(shift * x_unit, image.rows){ self.background_color = 'transparent' }
29
+ canvas.add_image image
30
+ canvas.image
31
+ end
32
+
33
+ def logo_with_name(image, name)
34
+ canvas = HorizontalGluingCanvas.new
35
+ canvas.add_image text_image(name, image.rows)
36
+ canvas.add_image image
37
+ canvas.image
38
+ end
39
+
40
+ def logo_canvas
41
+ LogoCanvas.new(letter_images, x_unit: x_unit, y_unit: y_unit)
42
+ end
43
+
44
+ # Takes an enumerable with relative (0 to 1) heights of letters and draws them scaled appropriately
45
+ def logo_for_ordered_letters(letters_with_heights)
46
+ logo_for_ordered_letters_nonscaling(rescale_letters(letters_with_heights))
47
+ end
48
+
49
+ # Takes an enumerable with height=>letter pairs draws a logo position with letters in order of enumeration
50
+ # It's a basic logo-block.
51
+ def logo_for_ordered_letters_nonscaling(letters_with_heights)
52
+ y_pos = 0
53
+ position_logo = Magick::ImageList.new
54
+ position_logo.set_minimal_size(x_unit, y_unit)
55
+ letters_with_heights.each do |height, letter|
56
+ y_pos += height
57
+ position_logo.put_image_at(letter_image(letter, x_unit, height), 0, y_unit - y_pos)
58
+ end
59
+ position_logo.flatten_images
60
+ end
61
+
62
+ def rescale_letters(letters_with_heights)
63
+ letters_with_heights
64
+ .reject{|part_of_height, letter| y_unit * part_of_height <= 1 }
65
+ .map{|part_of_height, letter| [(y_unit * part_of_height), letter] }
66
+ end
67
+ private :logo_for_ordered_letters_nonscaling, :rescale_letters
68
+
69
+ def letter_image(letter, x_size = x_unit, y_size = y_unit)
70
+ case letter
71
+ when Numeric
72
+ index = letter
73
+ else
74
+ index = letter_index(letter)
75
+ end
76
+ letter_images[index].dup.resize(x_size, y_size)
77
+ end
78
+
79
+ def letter_index(letter)
80
+ {'A' => 0 ,'C' => 1,'G' => 2 ,'T' => 3}[letter.to_s.upcase]
81
+ end
82
+
83
+ def self.letter_images(scheme_dir)
84
+ if File.exist?(File.join(scheme_dir,'a.png'))
85
+ extension = 'png'
86
+ elsif File.exist?(File.join(scheme_dir,'a.gif'))
87
+ extension = 'gif'
88
+ else
89
+ raise "Scheme not exists in folder #{scheme_dir}"
90
+ end
91
+
92
+ letter_files = %w[a c g t].collect{|letter| File.join(scheme_dir, "#{letter}.#{extension}") }
93
+ Magick::ImageList.new(*letter_files)
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'canvases/gluing_canvas'
2
+ require_relative 'canvases/vertical_gluing_canvas'
3
+ require_relative 'canvases/horizontal_gluing_canvas'
4
+ require_relative 'canvases/logo_canvas'
@@ -0,0 +1,35 @@
1
+ require_relative '../magick_support'
2
+
3
+ module SequenceLogo
4
+ class GluingCanvas
5
+ attr_reader :i_logo, :size
6
+ def initialize
7
+ @i_logo = Magick::ImageList.new
8
+ @size = 0
9
+ @rendering_callbacks = []
10
+ @rendering_callbacks << method(:render_background)
11
+ end
12
+
13
+ def image
14
+ @rendering_callbacks.each(&:call)
15
+ @i_logo.flatten_images
16
+ end
17
+
18
+ def background(fill)
19
+ @background_fill = fill
20
+ end
21
+
22
+ def render_background
23
+ if @background_fill
24
+ @i_logo.unshift Magick::Image.new(x_size, y_size, @background_fill)
25
+ else
26
+ @i_logo.set_minimal_size(x_size, y_size)
27
+ end
28
+ end
29
+ private :render_background
30
+
31
+ def add_image(item)
32
+ @size += 1
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,20 @@
1
+ require_relative 'gluing_canvas'
2
+
3
+ module SequenceLogo
4
+ class HorizontalGluingCanvas < GluingCanvas
5
+ alias_method :length, :size
6
+
7
+ def add_image(image)
8
+ super
9
+ @i_logo.put_image_at(image, x_size, 0)
10
+ end
11
+
12
+ def x_size
13
+ @i_logo.to_a.map(&:columns).inject(0, :+)
14
+ end
15
+
16
+ def y_size
17
+ @i_logo.to_a.map(&:rows).max || 0
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,40 @@
1
+ require 'RMagick'
2
+ require_relative '../magick_support'
3
+ require_relative 'horizontal_gluing_canvas'
4
+
5
+ module SequenceLogo
6
+ class LogoCanvas < HorizontalGluingCanvas
7
+ attr_reader :canvas_factory
8
+ def initialize(canvas_factory)
9
+ super()
10
+ @canvas_factory = canvas_factory
11
+ end
12
+
13
+ def draw_threshold_line(threshold_level)
14
+ # stores threshold levels but doesn't render them because full length of canvas is not known yet,
15
+ # so instantly rendered line would be too short
16
+ @rendering_callbacks.push ->{ render_threshold_line(threshold_level) }
17
+ end
18
+
19
+ def render_threshold_line(threshold_level)
20
+ y_coord = y_size - threshold_level * y_size
21
+ dr = Magick::Draw.new
22
+ dr.fill('transparent')
23
+
24
+ dr.stroke_width(y_size / 200.0)
25
+ dr.stroke_dasharray(7,7)
26
+
27
+ dr.stroke('silver')
28
+ dr.line(0, y_coord, x_size, y_coord)
29
+ dr.draw(@i_logo)
30
+ end
31
+
32
+ def add_letter(letter)
33
+ add_image( canvas_factory.letter_image(letter) )
34
+ end
35
+
36
+ def add_position_ordered(ordered_letter_heights)
37
+ add_image( canvas_factory.logo_for_ordered_letters(ordered_letter_heights) )
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,18 @@
1
+ require_relative 'gluing_canvas'
2
+
3
+ module SequenceLogo
4
+ class VerticalGluingCanvas < GluingCanvas
5
+ def add_image(image)
6
+ super
7
+ @i_logo.put_image_at(image, 0, y_size)
8
+ end
9
+
10
+ def x_size
11
+ @i_logo.to_a.map(&:columns).max || 0
12
+ end
13
+
14
+ def y_size
15
+ @i_logo.to_a.map(&:rows).inject(0, :+)
16
+ end
17
+ end
18
+ end
@@ -1,5 +1,7 @@
1
1
  require 'fileutils'
2
2
  require 'optparse'
3
+ require_relative 'support'
4
+
3
5
  module SequenceLogo
4
6
  class CLI
5
7
  attr_reader :options
@@ -34,4 +36,4 @@ module SequenceLogo
34
36
  end
35
37
  end
36
38
  end
37
- end
39
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'data_models/sequence'
2
+ require_relative 'data_models/sequence_with_snp'
3
+ require_relative 'data_models/ppm_logo'
4
+ require_relative 'data_models/predefined_logo'
@@ -0,0 +1,63 @@
1
+ require_relative '../canvases'
2
+
3
+ module SequenceLogo
4
+ # wrapper around PPM to make it possible to configure rendering in a flexible way
5
+ class PPMLogo
6
+ attr_reader :ppm, :words_count, :icd_mode, :enable_threshold_lines
7
+
8
+ def initialize(ppm, options = {})
9
+ @ppm = ppm
10
+ @words_count = options[:words_count]
11
+ @icd_mode = options[:icd_mode]
12
+ @enable_threshold_lines = options[:enable_threshold_lines]
13
+
14
+ @ppm.words_count = @words_count if @words_count
15
+ unless ppm.words_count
16
+ report "words count for PPM is undefined, assuming weblogo mode"
17
+ @icd_mode = :weblogo
18
+ end
19
+ end
20
+
21
+ def length
22
+ ppm.length
23
+ end
24
+
25
+ def name
26
+ ppm.name
27
+ end
28
+
29
+ def revcomp
30
+ PPMLogo.new(ppm.revcomp, words_count: words_count, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
31
+ end
32
+
33
+ def logo_matrix
34
+ ppm.get_logo(icd_mode)
35
+ end
36
+
37
+ def render(canvas_factory)
38
+ canvas = LogoCanvas.new(canvas_factory)
39
+ if icd_mode == :discrete
40
+ canvas.background(Magick::HatchFill.new('white', 'white'))
41
+ if enable_threshold_lines
42
+ canvas.draw_threshold_line(ppm.get_line(ppm.icd2of4))
43
+ canvas.draw_threshold_line(ppm.get_line(ppm.icdThc))
44
+ canvas.draw_threshold_line(ppm.get_line(ppm.icdTlc))
45
+ end
46
+ else
47
+ canvas.background(Magick::HatchFill.new('white', 'bisque'))
48
+ end
49
+
50
+ logo_matrix.each do |position|
51
+ canvas.add_position_ordered( position_sorted_by_height(position) )
52
+ end
53
+ canvas.image
54
+ end
55
+
56
+ # [3,1,1,2] ==> [[3, 0],[2, 3],[1, 1],[1, 2]] (derived from [[3, 'A'],[2,'T'],[1,'C'],[1,'G']])
57
+ def position_sorted_by_height(position)
58
+ # sort by [count, letter_index] allows us to make stable sort by count (it's useful for predictable order of same-height nucleotides)
59
+ position.each_with_index.sort_by{|count, letter_index| [count, letter_index] }.reverse
60
+ end
61
+ private :position_sorted_by_height
62
+ end
63
+ end
@@ -0,0 +1,29 @@
1
+ module SequenceLogo
2
+ class PredefinedLogo
3
+ attr_reader :direct_image, :reverse_image
4
+ def initialize(options = {})
5
+ @direct_image = options[:direct_image]
6
+ @reverse_image = options[:reverse_image]
7
+ @name = options[:name]
8
+ @length = options[:length]
9
+ end
10
+
11
+ def length
12
+ raise 'Length not defined' unless @length
13
+ @length
14
+ end
15
+
16
+ def name
17
+ raise 'Name not defined' unless @name
18
+ @name
19
+ end
20
+
21
+ def revcomp
22
+ PredefinedLogo.new(direct_image: @reverse_image, reverse_image: @direct_image, name: @name, length: @length)
23
+ end
24
+
25
+ def render(canvas_factory)
26
+ @direct_image
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,40 @@
1
+ require_relative '../canvases'
2
+
3
+ module SequenceLogo
4
+ class Sequence
5
+ attr_reader :sequence, :name
6
+ def initialize(sequence, options = {})
7
+ raise 'Wrong sequence' unless Sequence.valid_sequence?(sequence)
8
+ @sequence = sequence
9
+ @name = options[:name] || sequence
10
+ end
11
+
12
+ def length
13
+ sequence.length
14
+ end
15
+
16
+ def revcomp
17
+ Sequence.new(Sequence.revcomp(sequence), name: name)
18
+ end
19
+
20
+ def render(canvas_factory)
21
+ canvas = LogoCanvas.new(canvas_factory)
22
+ canvas.background(Magick::HatchFill.new('white', 'white'))
23
+ sequence.each_char do |letter|
24
+ canvas.add_letter(letter)
25
+ end
26
+ canvas.image
27
+ end
28
+
29
+ def self.complement(sequence)
30
+ sequence.tr('acgtACGT', 'tgcaTGCA')
31
+ end
32
+ def self.revcomp(sequence)
33
+ complement(sequence).reverse
34
+ end
35
+
36
+ def self.valid_sequence?(sequence)
37
+ sequence.match /\A[acgt]+\z/i
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,45 @@
1
+ require_relative 'sequence'
2
+ require_relative '../canvases'
3
+
4
+ module SequenceLogo
5
+ class SequenceWithSNP
6
+ attr_reader :left, :allele_variants, :right, :name
7
+ def initialize(left, allele_variants, right, options = {})
8
+ raise unless Sequence.valid_sequence?(left)
9
+ raise unless Sequence.valid_sequence?(right)
10
+ raise unless allele_variants.all?{|letter| %w[A C G T].include?(letter.upcase) }
11
+ @left, @allele_variants, @right = left, allele_variants, right
12
+ @name = options[:name] || (left + '_' + allele_variants.join('-') + '_' + right)
13
+ end
14
+
15
+ def self.from_string(sequence, options = {})
16
+ left, mid, right = sequence.split(/[\[\]]/)
17
+ allele_variants = mid.split('/')
18
+ SequenceWithSNP.new(left, allele_variants, right, options)
19
+ end
20
+
21
+ def length
22
+ left.length + 1 + right.length
23
+ end
24
+
25
+ def revcomp
26
+ SequenceWithSNP.new(Sequence.revcomp(right),
27
+ allele_variants.map{|letter| Sequence.complement(letter) },
28
+ Sequence.revcomp(left))
29
+ end
30
+
31
+ def render(canvas_factory)
32
+ canvas = LogoCanvas.new(canvas_factory)
33
+ canvas.background(Magick::HatchFill.new('white', 'white'))
34
+ left.each_char{|letter| canvas.add_letter(letter) }
35
+ canvas.add_position_ordered(snp_position_heights)
36
+ right.each_char{|letter| canvas.add_letter(letter) }
37
+ canvas.image
38
+ end
39
+
40
+ def snp_position_heights
41
+ allele_variants.map{|letter| [1.0 / allele_variants.size, letter] }
42
+ end
43
+ private :snp_position_heights
44
+ end
45
+ end
@@ -1,43 +1,64 @@
1
1
  require_relative '../../sequence_logo'
2
2
  require 'fileutils'
3
- require 'cgi'
4
3
  require 'tempfile'
5
4
 
6
- def generate_glued_logo(alignment_infos, options, total_orientation, output_file)
7
- logos = {}
8
- logo_files = []
9
- rightmost_side = alignment_infos.map do |line|
5
+ def load_alignment_infos(alignment_lines)
6
+ alignment_lines.map{|line|
10
7
  filename, shift, orientation, motif_name = line.strip.split("\t")
8
+ motif_name ||= File.basename(filename, File.extname(filename))
11
9
  shift = shift.to_i
12
- shift + get_ppm_from_file(filename).length
13
- end.max
10
+ orientation = orientation.downcase.to_sym
14
11
 
15
- alignment_infos.each do |line|
16
- filename, shift, orientation, motif_name = line.strip.split("\t")
17
- motif_name ||= CGI.unescape(File.basename(filename, File.extname(filename)))
18
12
  ppm = get_ppm_from_file(filename)
19
- shift = shift.to_i
20
- raise 'Unknown orientation' unless %w[direct revcomp].include?(orientation.downcase)
21
- if total_orientation == :revcomp
22
- orientation = (orientation == 'direct') ? 'revcomp' : 'direct'
23
- shift = rightmost_side - shift - ppm.length
24
- end
25
13
  checkerr("bad input file: #{filename}") { ppm == nil }
26
- logo_file = Tempfile.new(filename)
27
- logo_files << logo_file
28
- case orientation
29
- when 'direct'
30
- SequenceLogo.draw_logo(ppm, options).write("PNG:#{logo_file.path}")
31
- when 'revcomp'
32
- SequenceLogo.draw_logo(ppm.revcomp, options).write("PNG:#{logo_file.path}")
33
- else
34
- raise "Unknown orientation #{orientation} for #{motif_name}"
35
- end
36
- logos[logo_file.path] = {shift: shift, length: ppm.length, name: motif_name}
14
+ ppm.name ||= motif_name
15
+
16
+ raise 'Unknown orientation' unless [:direct, :revcomp].include?(orientation)
17
+
18
+ ppm_oriented = (orientation == :direct) ? ppm : ppm.revcomp
19
+ {motif: ppm_oriented, shift: shift}
20
+ }
21
+ end
22
+
23
+ def make_logo_alignment(aligned_motifs, options)
24
+ alignment = SequenceLogo::Alignment.new
25
+ aligned_motifs.map {|motif_infos|
26
+ ppm_logo = SequenceLogo::PPMLogo.new(motif_infos[:motif],
27
+ icd_mode: options[:icd_mode],
28
+ words_count: options[:words_count],
29
+ enable_threshold_lines: options[:threshold_lines])
30
+ alignment += SequenceLogo::Alignment::Item.new(ppm_logo, motif_infos[:shift])
31
+ }
32
+ alignment
33
+ end
34
+
35
+ def readlines_from_file_or_stdin(argv, options = {})
36
+ default_options = { source_not_given_msg: 'Specify input data',
37
+ both_sources_given_msg: 'Specify either file with data or data itself in stdin, not both'}
38
+ options = default_options.merge(options)
39
+ raise options[:both_sources_given_msg] if !argv.empty? && !$stdin.tty?
40
+ if !argv.empty?
41
+ lines = File.readlines(argv.first)
42
+ elsif !$stdin.tty?
43
+ lines = $stdin.readlines
44
+ else
45
+ raise ArgumentError, options[:source_not_given_msg]
37
46
  end
47
+ lines
48
+ end
38
49
 
39
- SequenceLogo.glue_files(logos, output_file, options)
40
- logo_files.each(&:close)
50
+ def direct_output_filename(output_file)
51
+ extname = File.extname(output_file)
52
+ basename = File.basename_wo_extname(output_file)
53
+ dirname = File.dirname(output_file)
54
+ File.join(dirname, "#{basename}_direct#{extname}")
55
+ end
56
+
57
+ def reverse_output_filename(output_file)
58
+ extname = File.extname(output_file)
59
+ basename = File.basename_wo_extname(output_file)
60
+ dirname = File.dirname(output_file)
61
+ File.join(dirname, "#{basename}_revcomp#{extname}")
41
62
  end
42
63
 
43
64
  begin
@@ -57,7 +78,7 @@ begin
57
78
 
58
79
  argv = ARGV
59
80
  total_orientation = :direct
60
- default_options = {x_unit: 30, y_unit: 60, words_count: nil, icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa', logo_shift: 300, text_size_pt: 24}
81
+ default_options = {x_unit: 30, y_unit: 60, words_count: nil, icd_mode: :discrete, threshold_lines: false, scheme: 'nucl_simpa', logo_shift: 300, text_size: 24}
61
82
  cli = SequenceLogo::CLI.new(default_options)
62
83
  cli.instance_eval do
63
84
  parser.banner = doc
@@ -78,25 +99,25 @@ begin
78
99
  output_file = argv.shift
79
100
  raise ArgumentError, 'Specify output file' unless output_file
80
101
 
81
- raise 'You can specify alignment infos either from file or from stdin. Don\'t use both sources simultaneously' if !ARGV.empty? && !$stdin.tty?
82
- if !ARGV.empty?
83
- alignment_infos = File.readlines(ARGV.shift)
84
- elsif !$stdin.tty?
85
- alignment_infos = $stdin.readlines
86
- else
87
- raise ArgumentError, 'Specify alignment infos'
88
- end
102
+ alignment_lines = readlines_from_file_or_stdin(argv, source_not_given_msg: 'Specify alignment infos',
103
+ both_sources_given_msg: 'You can specify alignment infos either from file or from stdin. Don\'t use both sources simultaneously')
104
+ alignment = make_logo_alignment(load_alignment_infos(alignment_lines), options)
89
105
 
90
- if total_orientation == :both
91
- extname = File.extname(output_file)
92
- basename = File.basename(output_file, extname)
93
- dirname = File.dirname(output_file)
94
- generate_glued_logo(alignment_infos, options, :direct, File.join(dirname, "#{basename}_direct.#{extname}"))
95
- generate_glued_logo(alignment_infos, options, :revcomp, File.join(dirname, "#{basename}_revcomp.#{extname}"))
96
- else
97
- generate_glued_logo(alignment_infos, options, total_orientation, output_file)
106
+ scheme_dir = File.join(SequenceLogo::AssetsPath, options[:scheme])
107
+ letter_images = SequenceLogo::CanvasFactory.letter_images(scheme_dir)
108
+ canvas_factory = SequenceLogo::CanvasFactory.new(letter_images, x_unit: options[:x_unit], y_unit: options[:y_unit],
109
+ text_size: options[:text_size], logo_shift: options[:logo_shift])
110
+
111
+ case total_orientation
112
+ when :direct
113
+ alignment.render(canvas_factory).write('PNG:' + output_file)
114
+ when :revcomp
115
+ alignment.revcomp.render(canvas_factory).write('PNG:' + output_file)
116
+ when :both
117
+ alignment.render(canvas_factory).write('PNG:' + direct_output_filename(output_file))
118
+ alignment.revcomp.render(canvas_factory).write('PNG:' + reverse_output_filename(output_file))
98
119
  end
99
120
 
100
121
  rescue => err
101
122
  $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
102
- end
123
+ end
@@ -1,17 +1,50 @@
1
1
  require_relative '../../sequence_logo'
2
2
  require 'shellwords'
3
3
 
4
+ # [{renderable: , name: }] --> [{renderable: , filename: }]
5
+ def in_necessary_orientations(objects_to_render, orientation, logo_folder)
6
+ objects_to_render.map do |infos|
7
+ case orientation
8
+ when :direct
9
+ {renderable: infos[:renderable], filename: File.join(logo_folder, "#{infos[:name]}.png") }
10
+ when :revcomp
11
+ {renderable: infos[:renderable].revcomp, filename: File.join(logo_folder, "#{infos[:name]}.png") }
12
+ when :both
13
+ [ {renderable: infos[:renderable], filename: File.join(logo_folder, "#{infos[:name]}_direct.png") },
14
+ {renderable: infos[:renderable].revcomp, filename: File.join(logo_folder, "#{infos[:name]}_revcomp.png") } ]
15
+ end
16
+ end.flatten
17
+ end
18
+
19
+ def arglist_augmented_with_stdin(argv)
20
+ result = argv
21
+ result += $stdin.read.shellsplit unless $stdin.tty?
22
+ result
23
+ end
24
+
4
25
  begin
26
+ include SequenceLogo
27
+
5
28
  doc = <<-EOS
6
- sequence_logo is a tool for drawing motif logos. It is able to process PCM files either as a position matrix (*.pat or *.pcm), or in FASTA format (file extensions: .mfa, .fasta, .plain), or in SMall BiSMark format (.xml), or in IUPAC format (any other extension).
29
+ sequence_logo is a tool for drawing motif and sequence logos
30
+ It is able to process
31
+ - PCM / PPM format i.e. position count/frequency matrix (*.pat or *.pcm) - preferable
32
+ - FASTA format (file extensions: .mfa, .fasta, .plain)
33
+ - SMall BiSMark format (.xml)
34
+ - IUPAC format (any other extension)
7
35
  Usage:
8
- sequence_logo [options] <pcm/ppm file>...
36
+ sequence_logo [options] <motif file>...
9
37
  or
10
38
  ls pcm_folder/*.pcm | sequence_logo [options]
39
+ or
40
+ sequence_logo --sequence <sequence>...
41
+ or
42
+ sequence_logo --snp-sequence <sequence with SNP>...
43
+
11
44
  EOS
12
45
 
13
46
  argv = ARGV
14
- default_options = {x_unit: 30, y_unit: 60, words_count: nil, orientation: :both, logo_folder: '.', icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa'}
47
+ default_options = {x_unit: 30, y_unit: 60, words_count: nil, orientation: :direct, logo_folder: '.', icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa'}
15
48
  cli = SequenceLogo::CLI.new(default_options)
16
49
  cli.instance_eval do
17
50
  parser.banner = doc
@@ -23,30 +56,61 @@ begin
23
56
  raise ArgumentError, 'Orientation can be either direct or revcomp or both' unless [:direct, :revcomp, :both].include?(v)
24
57
  options[:orientation] = v
25
58
  end
59
+
60
+ parser.on('--snp-sequence', 'Specify sequences with SNP (like ATCTC[C/G]CCTAAT) instead of motif filenames') do
61
+ options[:sequence_w_snp] = true
62
+ end
63
+ parser.on('--sequence', 'Specify sequence (like ATCTCGCCTAAT) instead of motif filenames') do
64
+ options[:sequence] = true
65
+ end
26
66
  end
27
67
  options = cli.parse_options!(argv)
28
68
 
29
69
  logo_folder = options[:logo_folder]
30
70
  Dir.mkdir(logo_folder) unless Dir.exist?(logo_folder)
31
71
 
32
- filenames = argv
33
- filenames += $stdin.read.shellsplit unless $stdin.tty?
34
- raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
72
+ scheme_dir = File.join(SequenceLogo::AssetsPath, options[:scheme])
73
+ letter_images = SequenceLogo::CanvasFactory.letter_images(scheme_dir)
74
+ canvas_factory = SequenceLogo::CanvasFactory.new(letter_images, x_unit: options[:x_unit], y_unit: options[:y_unit])
75
+
76
+ raise "Specify either sequence or sequence with SNP or none of them, but not both" if options[:sequence] && options[:sequence_w_snp]
35
77
 
36
- filenames.each do |filename|
37
- ppm = get_ppm_from_file(filename)
38
- checkerr("bad input file: #{filename}") { ppm == nil }
78
+ objects_to_render = []
79
+ if options[:sequence]
80
+ sequences = arglist_augmented_with_stdin(argv)
81
+ raise ArgumentError, 'Specify at least one sequence' if sequences.empty?
82
+
83
+ sequences.each do |sequence|
84
+ objects_to_render << {renderable: SequenceLogo::Sequence.new(sequence),
85
+ name: File.join(logo_folder, sequence)}
86
+ end
87
+ elsif options[:sequence_w_snp]
88
+ sequences = arglist_augmented_with_stdin(argv)
89
+ raise ArgumentError, 'Specify at least one sequence' if sequences.empty?
39
90
 
40
- filename_wo_ext = File.basename(filename, File.extname(filename))
41
- if [:direct, :both].include?(options[:orientation])
42
- direct_output = File.join(logo_folder, "#{filename_wo_ext}_direct.png")
43
- SequenceLogo.draw_logo(ppm, options).write(direct_output)
91
+ sequences.each do |sequence_w_snp|
92
+ objects_to_render << {renderable: SequenceLogo::SequenceWithSNP.from_string(sequence_w_snp),
93
+ name: File.join(logo_folder, sequence_w_snp.gsub(/[\[\]\/]/, '_'))}
44
94
  end
45
- if [:revcomp, :both].include?(options[:orientation])
46
- revcomp_output = File.join(logo_folder, "#{filename_wo_ext}_revcomp.png")
47
- SequenceLogo.draw_logo(ppm.revcomp, options).write(revcomp_output)
95
+ else
96
+ filenames = arglist_augmented_with_stdin(argv)
97
+ raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
98
+
99
+ filenames.each do |filename|
100
+ ppm = get_ppm_from_file(filename)
101
+ checkerr("bad input file: #{filename}") { ppm == nil }
102
+
103
+ logo = SequenceLogo::PPMLogo.new( ppm,
104
+ icd_mode: options[:icd_mode],
105
+ words_count: options[:words_count],
106
+ enable_threshold_lines: options[:threshold_lines])
107
+ objects_to_render << {renderable: logo, name: File.join(logo_folder, File.basename_wo_extname(filename))}
48
108
  end
49
109
  end
110
+
111
+ in_necessary_orientations(objects_to_render, options[:orientation], logo_folder).each do |infos|
112
+ infos[:renderable].render(canvas_factory).write("PNG:#{infos[:filename]}")
113
+ end
50
114
  rescue => err
51
115
  $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
52
- end
116
+ end
@@ -0,0 +1,14 @@
1
+ require 'RMagick'
2
+
3
+ class Magick::ImageList
4
+ def put_image_at(image, x, y)
5
+ self << image
6
+ cur_image.page = Magick::Rectangle.new(0, 0, x, y)
7
+ end
8
+
9
+ # add transparent layer so that full canvas size can't be less than given size
10
+ def set_minimal_size(x_size, y_size)
11
+ empty_image = Magick::Image.new(x_size, y_size){ self.background_color = 'transparent'}
12
+ self.unshift(empty_image)
13
+ end
14
+ end
@@ -1,114 +1,5 @@
1
+ require_relative 'canvases'
2
+ require_relative 'canvas_factory'
3
+ require_relative 'alignment'
4
+ require_relative 'data_models'
1
5
  require_relative 'ytilib'
2
- require 'RMagick'
3
-
4
- module SequenceLogo
5
- def self.draw_threshold_lines(i_logo, ppm)
6
- x_size = i_logo.columns
7
- y_size = i_logo.rows
8
-
9
- line2of4 = y_size - ppm.get_line(ppm.icd2of4) * y_size
10
- lineThc = y_size - ppm.get_line(ppm.icdThc) * y_size
11
- lineTlc = y_size - ppm.get_line(ppm.icdTlc) * y_size
12
-
13
- dr = Magick::Draw.new
14
- dr.fill('transparent')
15
-
16
- dr.stroke_width(y_size / 200.0)
17
- dr.stroke_dasharray(7,7)
18
-
19
- dr.stroke('silver')
20
- dr.line(0, line2of4, x_size, line2of4)
21
- dr.line(0, lineThc, x_size, lineThc)
22
- dr.line(0, lineTlc, x_size, lineTlc)
23
-
24
- dr.draw(i_logo)
25
- end
26
-
27
- def self.create_canvas(ppm, options)
28
- x_size = options[:x_unit] * ppm.length
29
- y_size = options[:y_unit]
30
-
31
- i_logo = Magick::ImageList.new
32
- if options[:icd_mode] == :discrete
33
- i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'white'))
34
- draw_threshold_lines(i_logo, ppm) if options[:threshold_lines]
35
- else
36
- i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'bisque'))
37
- end
38
-
39
- i_logo
40
- end
41
-
42
- def self.letter_images(scheme_dir)
43
- if File.exist?(File.join(scheme_dir,'a.png'))
44
- extension = 'png'
45
- elsif File.exist?(File.join(scheme_dir,'a.gif'))
46
- extension = 'gif'
47
- else
48
- raise "Scheme not exists in folder #{scheme_dir}"
49
- end
50
-
51
- letter_files = %w[a c g t].collect{|letter| File.join(scheme_dir, "#{letter}.#{extension}") }
52
- Magick::ImageList.new(*letter_files)
53
- end
54
-
55
- def self.draw_letters_on_canvas(i_logo, i_letters, ppm, options)
56
- y_unit = options[:y_unit]
57
- x_unit = options[:x_unit]
58
- matrix = ppm.get_logo(options[:icd_mode])
59
- matrix['A'].each_index { |i|
60
- y_pos = 0
61
- sorted_letters = ['A', 'C', 'G', 'T'].collect { |letter| {:score => matrix[letter][i], :letter => letter} }.sort_by { |pair| pair[:score] }.collect { |pair| pair[:letter] }.reverse
62
- sorted_letters.each { |letter|
63
- next if y_unit * matrix[letter][i] <= 1
64
- letter_index = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}[letter]
65
- y_block = (y_unit * matrix[letter][i]).round
66
- i_logo << i_letters[letter_index].dup.resize(x_unit, y_block)
67
- y_pos += y_block
68
- i_logo.cur_image.page = Magick::Rectangle.new(0, 0, i * x_unit, y_unit - y_pos )
69
- }
70
- }
71
- end
72
-
73
- def self.draw_logo(ppm, options = {})
74
- ppm.words_count = options[:words_count] if options[:words_count]
75
- unless ppm.words_count
76
- report "words count for PPM is undefined, assuming weblogo mode"
77
- options[:icd_mode] = :weblogo
78
- end
79
- i_logo = create_canvas(ppm, options)
80
- scheme_dir = File.join(AssetsPath, options[:scheme])
81
- draw_letters_on_canvas(i_logo, letter_images(scheme_dir), ppm, options)
82
- i_logo = i_logo.flatten_images
83
- end
84
-
85
- # logos = { filename => {shift: ..., length: ..., name: ...} }
86
- def self.glue_files(logos, output_file, options)
87
- logo_shift = options[:logo_shift] || 300
88
- x_unit = options[:x_unit] || 30
89
- y_unit = options[:y_unit] || 60
90
- text_size = options[:text_size] || 24
91
-
92
- leftmost_shift = logos.map{|file,infos| infos[:shift] }.min
93
- logos.each{|file, infos| infos[:shift] -= leftmost_shift}
94
- full_alignment_size = logos.map{|file,infos| infos[:length] + infos[:shift] }.max
95
-
96
- x_size = logo_shift + full_alignment_size * x_unit
97
- y_size = logos.size * y_unit
98
- command_string = "convert -size #{ x_size }x#{ y_size } -pointsize #{text_size} xc:white "
99
- logos.each_with_index do |(logo_filename,infos), idx|
100
- logo_x_start = logo_shift + infos[:shift] * x_unit
101
- logo_y_start = y_unit * idx
102
- command_string << "\"#{ logo_filename }\" -geometry +#{ logo_x_start }+#{ logo_y_start } -composite "
103
- end
104
-
105
- command_draw_names = ""
106
- logos.each_with_index do |(logo_filename,infos), idx|
107
- text_x_start = 10
108
- text_y_start = y_unit * (idx + 0.5)
109
- command_draw_names << "-draw \"text #{ text_x_start },#{ text_y_start } '#{infos[:name]}'\" "
110
- end
111
-
112
- system(command_string + command_draw_names + "\"#{output_file}\"")
113
- end
114
- end
@@ -0,0 +1,5 @@
1
+ class File
2
+ def self.basename_wo_extname(filename)
3
+ File.basename(filename, File.extname(filename))
4
+ end
5
+ end
@@ -1,3 +1,3 @@
1
1
  module SequenceLogo
2
- VERSION = "1.0.6"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -7,4 +7,4 @@ require_relative 'ytilib/randoom'
7
7
  require_relative 'ytilib/bismark'
8
8
  require_relative 'ytilib/hack1'
9
9
  require_relative 'ytilib/infocod'
10
- require_relative 'ytilib/ppm_support'
10
+ require_relative 'ytilib/ppm_support'
@@ -5,6 +5,18 @@ module Ytilib
5
5
  attr_accessor :words_count
6
6
 
7
7
  alias length size
8
+
9
+ def each_position_index(&block)
10
+ @matrix['A'].each_index(&block)
11
+ end
12
+
13
+ def each_position(&block)
14
+ return enum_for(:each_position) unless block_given?
15
+ @matrix['A'].each_index do |i|
16
+ position = ['A', 'C', 'G', 'T'].map{|letter| @matrix[letter][i] }
17
+ yield position
18
+ end
19
+ end
8
20
 
9
21
  def score_mean(bckgr = Randoom::DEF_PROBS)
10
22
  (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
@@ -324,12 +336,12 @@ module Ytilib
324
336
  attributes = {"length" => @size}
325
337
  attributes["words-count"] = @words_count if @words_count && @words_count > 0
326
338
  pe = b.add_element( pwm ? "PWM" : "PCM", attributes )
327
- (0...@matrix['A'].size).each { |i|
339
+ each_position_index do |i|
328
340
  pm_c = pe.add_element("pm-column", {"position" => i+1})
329
341
  ['A', 'C', 'G', 'T'].each { |l|
330
342
  pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
331
343
  }
332
- }
344
+ end
333
345
  end
334
346
 
335
347
  def PM.from_bismark(b, iupacomp = false)
@@ -43,40 +43,25 @@ class PPM
43
43
 
44
44
 
45
45
  def get_logo_weblogo
46
- rseq = []
47
- @matrix['A'].each_index { |i|
48
- rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
49
- pn = @matrix[l][i]
50
- sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
51
- }
46
+ rseq = each_position.map {|position|
47
+ position.map{|el| (el == 0) ? 0 : el * Math.log2(el) }.inject(0, :+) + 2
52
48
  }
53
-
54
- mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
55
- @matrix['A'].each_index { |i|
56
- ['A','C','G','T'].each { |l|
57
- mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
58
- }
49
+
50
+ each_position.with_index.map {|position, ind|
51
+ position.map{|el| el * rseq[ind] / 2 }
59
52
  }
60
-
61
- mat
62
53
  end
63
54
 
64
55
  def get_logo_discrete
65
56
  checkerr("words count is undefined") { !words_count }
66
57
 
67
- rseq = []
68
- @matrix['A'].each_index { |i|
69
- rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
58
+ rseq = each_position_index.map {|i|
59
+ (icd4of4 == 0) ? 1.0 : get_line(infocod(i))
70
60
  }
71
-
72
- mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
73
- @matrix['A'].each_index { |i|
74
- ['A','C','G','T'].each { |l|
75
- mat[l][i] = @matrix[l][i] * rseq[i]
76
- }
61
+
62
+ each_position.with_index.map {|position, ind|
63
+ position.map{|el| el * rseq[ind] }
77
64
  }
78
-
79
- mat
80
65
  end
81
66
 
82
67
  def revcomp
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequence_logo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-21 00:00:00.000000000 Z
11
+ date: 2014-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rmagick
@@ -44,14 +44,32 @@ files:
44
44
  - bin/glue_logos
45
45
  - bin/sequence_logo
46
46
  - lib/sequence_logo.rb
47
+ - lib/sequence_logo/alignment.rb
47
48
  - lib/sequence_logo/assets/nucl_simpa/a.png
48
49
  - lib/sequence_logo/assets/nucl_simpa/c.png
49
50
  - lib/sequence_logo/assets/nucl_simpa/g.png
50
51
  - lib/sequence_logo/assets/nucl_simpa/t.png
52
+ - lib/sequence_logo/assets/nucl_simpa_bw/a.png
53
+ - lib/sequence_logo/assets/nucl_simpa_bw/c.png
54
+ - lib/sequence_logo/assets/nucl_simpa_bw/g.png
55
+ - lib/sequence_logo/assets/nucl_simpa_bw/t.png
56
+ - lib/sequence_logo/canvas_factory.rb
57
+ - lib/sequence_logo/canvases.rb
58
+ - lib/sequence_logo/canvases/gluing_canvas.rb
59
+ - lib/sequence_logo/canvases/horizontal_gluing_canvas.rb
60
+ - lib/sequence_logo/canvases/logo_canvas.rb
61
+ - lib/sequence_logo/canvases/vertical_gluing_canvas.rb
51
62
  - lib/sequence_logo/cli.rb
63
+ - lib/sequence_logo/data_models.rb
64
+ - lib/sequence_logo/data_models/ppm_logo.rb
65
+ - lib/sequence_logo/data_models/predefined_logo.rb
66
+ - lib/sequence_logo/data_models/sequence.rb
67
+ - lib/sequence_logo/data_models/sequence_with_snp.rb
52
68
  - lib/sequence_logo/exec/glue_logos.rb
53
69
  - lib/sequence_logo/exec/sequence_logo.rb
70
+ - lib/sequence_logo/magick_support.rb
54
71
  - lib/sequence_logo/pmflogo_lib.rb
72
+ - lib/sequence_logo/support.rb
55
73
  - lib/sequence_logo/version.rb
56
74
  - lib/sequence_logo/ytilib.rb
57
75
  - lib/sequence_logo/ytilib/addon.rb
@@ -90,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
90
108
  version: '0'
91
109
  requirements: []
92
110
  rubyforge_project:
93
- rubygems_version: 2.2.1
111
+ rubygems_version: 2.2.2
94
112
  signing_key:
95
113
  specification_version: 4
96
114
  summary: Tool for drawing sequence logos of motifs