sequence_logo 1.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 885b0f25932abe6a453fcbfb48eaae9e5227383a
4
- data.tar.gz: fc7ca0ff71c311e8d7301d086ed3b4d153f0493f
3
+ metadata.gz: 4ed1b2f635b68cfa6d69d47f1e122c81f917e286
4
+ data.tar.gz: 493c1d3b807804a5f9ee670df8d7fbc5f6fe883f
5
5
  SHA512:
6
- metadata.gz: c47be1cb8060cd2ed305d4d21862b86db24d6b24b7b5989fbe86a4fb245c2b8db6a07595a2719c922d0a32bdd10d59f8e916c87c23610911c1a18ca547148ce8
7
- data.tar.gz: cfc2df9b0029a19f17070a9937e6a5c61ccebee6ff0c1e65dac5da573c8e5e23de23b23ae240f00dcfc06b525f8e748c35ebfdd006765e2cd608e820450dc72f
6
+ metadata.gz: b6f56777b12c9dd26c0f3523f3762246baa42a070b045126fabb1b3250b9bba5baa4ff8685a80d06c2b8fabdd095a0c5512a3c0c7096016dd65f1983b24f12e8
7
+ data.tar.gz: d8bed3b3cab1287681a440b020166f8a5012f02ffb61bfb87ddb7437d23b07298c85cd68fc8a0713020ec8470362d7ea0ae4ad2738140553dfc595f7e5d00752
data/README.md CHANGED
@@ -38,16 +38,17 @@ SequenceLogo consists of two tools:
38
38
 
39
39
  * Tool **glue_logos** generates a single image of aligned motifs.
40
40
 
41
- `glue_logos <output file> <file with alignment infos>`
42
-
41
+ ```glue_logos <output file> <file with alignment infos>```
42
+
43
43
  or
44
44
 
45
- `<alignment infos> | glue_logos <output file>`
45
+ <alignment infos> | glue_logos <output file>
46
+
47
+ Input data comes either from file with alignments or from stdin. *glue_logos* is designated to work fine with macroape *align_motifs* tool and has input format the same as output format of *align_motifs* tool:
46
48
 
47
- Input data comes either from file with alignments or from stdin. *glue_logos* is designated to work fine with macroape *align_motifs* tool and has input format the same as output format of *align_motifs* tool:
48
- pcm_file_1 shift_1 orientation_1
49
- pcm_file_2 shift_2 orientation_2
50
- pcm_file_3 shift_3 orientation_3
49
+ pcm_file_1 shift_1 orientation_1 [motif_name_1]
50
+ pcm_file_2 shift_2 orientation_2 [motif_name_2]
51
+ pcm_file_3 shift_3 orientation_3 [motif_name_3]
51
52
 
52
53
  So it's simple to run
53
54
 
@@ -0,0 +1,70 @@
1
+ require_relative 'canvases'
2
+
3
+ module SequenceLogo
4
+ class Alignment
5
+ # object to be aligned should respond to #name, #render, #revcomp
6
+ class Item
7
+ attr_reader :object, :shift
8
+
9
+ def initialize(object, shift)
10
+ @object, @shift = object, shift
11
+ end
12
+
13
+ def length
14
+ @object.length
15
+ end
16
+
17
+ def name
18
+ @object.name
19
+ end
20
+
21
+ def render(canvas_factory)
22
+ object_image = object.render(canvas_factory)
23
+ shifted_image = canvas_factory.shifted_logo(object_image, shift)
24
+ canvas_factory.logo_with_name(shifted_image, name)
25
+ end
26
+ end
27
+
28
+ ####################
29
+
30
+ def initialize(items = [])
31
+ @alignable_items = items
32
+ end
33
+
34
+ def +(item)
35
+ Alignment.new(@alignable_items + [item])
36
+ end
37
+
38
+ def revcomp
39
+ items_reversed = @alignable_items.map{|item|
40
+ shift_reversed = rightmost_position - item.shift - item.length
41
+ Item.new(item.object.revcomp, shift_reversed)
42
+ }
43
+ Alignment.new(items_reversed)
44
+ end
45
+
46
+ def render(canvas_factory)
47
+ canvas = VerticalGluingCanvas.new
48
+ items_normalized.each do |item|
49
+ canvas.add_image item.render(canvas_factory)
50
+ end
51
+ canvas.background(Magick::HatchFill.new('white', 'white'))
52
+ canvas.image
53
+ end
54
+
55
+ private
56
+
57
+ # return list of items shifted altogether such that minimal shift is zero
58
+ def items_normalized
59
+ @alignable_items.map{|item| Item.new(item.object, item.shift - leftmost_shift) }
60
+ end
61
+
62
+ def leftmost_shift
63
+ @alignable_items.map(&:shift).min
64
+ end
65
+
66
+ def rightmost_position
67
+ @alignable_items.map{|item| item.shift + item.length }.max
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,96 @@
1
+ require_relative 'magick_support'
2
+ require_relative 'canvases'
3
+
4
+ module SequenceLogo
5
+ class CanvasFactory
6
+ attr_reader :x_unit, :y_unit, :text_size, :logo_shift
7
+ attr_reader :letter_images
8
+
9
+ def initialize(letter_images, options = {})
10
+ @letter_images = letter_images # .map{|letter_image| letter_image.dup.resize(x_size, y_size) }
11
+ @logo_shift = options[:logo_shift] || 300
12
+ @x_unit = options[:x_unit] || 30
13
+ @y_unit = options[:y_unit] || 60
14
+ @text_size = options[:text_size] || 24
15
+ end
16
+
17
+ def text_image(text, img_height = y_unit)
18
+ text_img = Magick::Image.new(logo_shift, img_height){ self.background_color = 'transparent' }
19
+ annotation = Magick::Draw.new
20
+ annotation.pointsize(text_size)
21
+ annotation.text(10, img_height / 2, text)
22
+ annotation.draw(text_img)
23
+ text_img
24
+ end
25
+
26
+ def shifted_logo(image, shift)
27
+ canvas = HorizontalGluingCanvas.new
28
+ canvas.add_image Magick::Image.new(shift * x_unit, image.rows){ self.background_color = 'transparent' }
29
+ canvas.add_image image
30
+ canvas.image
31
+ end
32
+
33
+ def logo_with_name(image, name)
34
+ canvas = HorizontalGluingCanvas.new
35
+ canvas.add_image text_image(name, image.rows)
36
+ canvas.add_image image
37
+ canvas.image
38
+ end
39
+
40
+ def logo_canvas
41
+ LogoCanvas.new(letter_images, x_unit: x_unit, y_unit: y_unit)
42
+ end
43
+
44
+ # Takes an enumerable with relative (0 to 1) heights of letters and draws them scaled appropriately
45
+ def logo_for_ordered_letters(letters_with_heights)
46
+ logo_for_ordered_letters_nonscaling(rescale_letters(letters_with_heights))
47
+ end
48
+
49
+ # Takes an enumerable with height=>letter pairs draws a logo position with letters in order of enumeration
50
+ # It's a basic logo-block.
51
+ def logo_for_ordered_letters_nonscaling(letters_with_heights)
52
+ y_pos = 0
53
+ position_logo = Magick::ImageList.new
54
+ position_logo.set_minimal_size(x_unit, y_unit)
55
+ letters_with_heights.each do |height, letter|
56
+ y_pos += height
57
+ position_logo.put_image_at(letter_image(letter, x_unit, height), 0, y_unit - y_pos)
58
+ end
59
+ position_logo.flatten_images
60
+ end
61
+
62
+ def rescale_letters(letters_with_heights)
63
+ letters_with_heights
64
+ .reject{|part_of_height, letter| y_unit * part_of_height <= 1 }
65
+ .map{|part_of_height, letter| [(y_unit * part_of_height), letter] }
66
+ end
67
+ private :logo_for_ordered_letters_nonscaling, :rescale_letters
68
+
69
+ def letter_image(letter, x_size = x_unit, y_size = y_unit)
70
+ case letter
71
+ when Numeric
72
+ index = letter
73
+ else
74
+ index = letter_index(letter)
75
+ end
76
+ letter_images[index].dup.resize(x_size, y_size)
77
+ end
78
+
79
+ def letter_index(letter)
80
+ {'A' => 0 ,'C' => 1,'G' => 2 ,'T' => 3}[letter.to_s.upcase]
81
+ end
82
+
83
+ def self.letter_images(scheme_dir)
84
+ if File.exist?(File.join(scheme_dir,'a.png'))
85
+ extension = 'png'
86
+ elsif File.exist?(File.join(scheme_dir,'a.gif'))
87
+ extension = 'gif'
88
+ else
89
+ raise "Scheme not exists in folder #{scheme_dir}"
90
+ end
91
+
92
+ letter_files = %w[a c g t].collect{|letter| File.join(scheme_dir, "#{letter}.#{extension}") }
93
+ Magick::ImageList.new(*letter_files)
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'canvases/gluing_canvas'
2
+ require_relative 'canvases/vertical_gluing_canvas'
3
+ require_relative 'canvases/horizontal_gluing_canvas'
4
+ require_relative 'canvases/logo_canvas'
@@ -0,0 +1,35 @@
1
+ require_relative '../magick_support'
2
+
3
+ module SequenceLogo
4
+ class GluingCanvas
5
+ attr_reader :i_logo, :size
6
+ def initialize
7
+ @i_logo = Magick::ImageList.new
8
+ @size = 0
9
+ @rendering_callbacks = []
10
+ @rendering_callbacks << method(:render_background)
11
+ end
12
+
13
+ def image
14
+ @rendering_callbacks.each(&:call)
15
+ @i_logo.flatten_images
16
+ end
17
+
18
+ def background(fill)
19
+ @background_fill = fill
20
+ end
21
+
22
+ def render_background
23
+ if @background_fill
24
+ @i_logo.unshift Magick::Image.new(x_size, y_size, @background_fill)
25
+ else
26
+ @i_logo.set_minimal_size(x_size, y_size)
27
+ end
28
+ end
29
+ private :render_background
30
+
31
+ def add_image(item)
32
+ @size += 1
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,20 @@
1
+ require_relative 'gluing_canvas'
2
+
3
+ module SequenceLogo
4
+ class HorizontalGluingCanvas < GluingCanvas
5
+ alias_method :length, :size
6
+
7
+ def add_image(image)
8
+ super
9
+ @i_logo.put_image_at(image, x_size, 0)
10
+ end
11
+
12
+ def x_size
13
+ @i_logo.to_a.map(&:columns).inject(0, :+)
14
+ end
15
+
16
+ def y_size
17
+ @i_logo.to_a.map(&:rows).max || 0
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,40 @@
1
+ require 'RMagick'
2
+ require_relative '../magick_support'
3
+ require_relative 'horizontal_gluing_canvas'
4
+
5
+ module SequenceLogo
6
+ class LogoCanvas < HorizontalGluingCanvas
7
+ attr_reader :canvas_factory
8
+ def initialize(canvas_factory)
9
+ super()
10
+ @canvas_factory = canvas_factory
11
+ end
12
+
13
+ def draw_threshold_line(threshold_level)
14
+ # stores threshold levels but doesn't render them because full length of canvas is not known yet,
15
+ # so instantly rendered line would be too short
16
+ @rendering_callbacks.push ->{ render_threshold_line(threshold_level) }
17
+ end
18
+
19
+ def render_threshold_line(threshold_level)
20
+ y_coord = y_size - threshold_level * y_size
21
+ dr = Magick::Draw.new
22
+ dr.fill('transparent')
23
+
24
+ dr.stroke_width(y_size / 200.0)
25
+ dr.stroke_dasharray(7,7)
26
+
27
+ dr.stroke('silver')
28
+ dr.line(0, y_coord, x_size, y_coord)
29
+ dr.draw(@i_logo)
30
+ end
31
+
32
+ def add_letter(letter)
33
+ add_image( canvas_factory.letter_image(letter) )
34
+ end
35
+
36
+ def add_position_ordered(ordered_letter_heights)
37
+ add_image( canvas_factory.logo_for_ordered_letters(ordered_letter_heights) )
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,18 @@
1
+ require_relative 'gluing_canvas'
2
+
3
+ module SequenceLogo
4
+ class VerticalGluingCanvas < GluingCanvas
5
+ def add_image(image)
6
+ super
7
+ @i_logo.put_image_at(image, 0, y_size)
8
+ end
9
+
10
+ def x_size
11
+ @i_logo.to_a.map(&:columns).max || 0
12
+ end
13
+
14
+ def y_size
15
+ @i_logo.to_a.map(&:rows).inject(0, :+)
16
+ end
17
+ end
18
+ end
@@ -1,5 +1,7 @@
1
1
  require 'fileutils'
2
2
  require 'optparse'
3
+ require_relative 'support'
4
+
3
5
  module SequenceLogo
4
6
  class CLI
5
7
  attr_reader :options
@@ -34,4 +36,4 @@ module SequenceLogo
34
36
  end
35
37
  end
36
38
  end
37
- end
39
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'data_models/sequence'
2
+ require_relative 'data_models/sequence_with_snp'
3
+ require_relative 'data_models/ppm_logo'
4
+ require_relative 'data_models/predefined_logo'
@@ -0,0 +1,63 @@
1
+ require_relative '../canvases'
2
+
3
+ module SequenceLogo
4
+ # wrapper around PPM to make it possible to configure rendering in a flexible way
5
+ class PPMLogo
6
+ attr_reader :ppm, :words_count, :icd_mode, :enable_threshold_lines
7
+
8
+ def initialize(ppm, options = {})
9
+ @ppm = ppm
10
+ @words_count = options[:words_count]
11
+ @icd_mode = options[:icd_mode]
12
+ @enable_threshold_lines = options[:enable_threshold_lines]
13
+
14
+ @ppm.words_count = @words_count if @words_count
15
+ unless ppm.words_count
16
+ report "words count for PPM is undefined, assuming weblogo mode"
17
+ @icd_mode = :weblogo
18
+ end
19
+ end
20
+
21
+ def length
22
+ ppm.length
23
+ end
24
+
25
+ def name
26
+ ppm.name
27
+ end
28
+
29
+ def revcomp
30
+ PPMLogo.new(ppm.revcomp, words_count: words_count, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
31
+ end
32
+
33
+ def logo_matrix
34
+ ppm.get_logo(icd_mode)
35
+ end
36
+
37
+ def render(canvas_factory)
38
+ canvas = LogoCanvas.new(canvas_factory)
39
+ if icd_mode == :discrete
40
+ canvas.background(Magick::HatchFill.new('white', 'white'))
41
+ if enable_threshold_lines
42
+ canvas.draw_threshold_line(ppm.get_line(ppm.icd2of4))
43
+ canvas.draw_threshold_line(ppm.get_line(ppm.icdThc))
44
+ canvas.draw_threshold_line(ppm.get_line(ppm.icdTlc))
45
+ end
46
+ else
47
+ canvas.background(Magick::HatchFill.new('white', 'bisque'))
48
+ end
49
+
50
+ logo_matrix.each do |position|
51
+ canvas.add_position_ordered( position_sorted_by_height(position) )
52
+ end
53
+ canvas.image
54
+ end
55
+
56
+ # [3,1,1,2] ==> [[3, 0],[2, 3],[1, 1],[1, 2]] (derived from [[3, 'A'],[2,'T'],[1,'C'],[1,'G']])
57
+ def position_sorted_by_height(position)
58
+ # sort by [count, letter_index] allows us to make stable sort by count (it's useful for predictable order of same-height nucleotides)
59
+ position.each_with_index.sort_by{|count, letter_index| [count, letter_index] }.reverse
60
+ end
61
+ private :position_sorted_by_height
62
+ end
63
+ end
@@ -0,0 +1,29 @@
1
+ module SequenceLogo
2
+ class PredefinedLogo
3
+ attr_reader :direct_image, :reverse_image
4
+ def initialize(options = {})
5
+ @direct_image = options[:direct_image]
6
+ @reverse_image = options[:reverse_image]
7
+ @name = options[:name]
8
+ @length = options[:length]
9
+ end
10
+
11
+ def length
12
+ raise 'Length not defined' unless @length
13
+ @length
14
+ end
15
+
16
+ def name
17
+ raise 'Name not defined' unless @name
18
+ @name
19
+ end
20
+
21
+ def revcomp
22
+ PredefinedLogo.new(direct_image: @reverse_image, reverse_image: @direct_image, name: @name, length: @length)
23
+ end
24
+
25
+ def render(canvas_factory)
26
+ @direct_image
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,40 @@
1
+ require_relative '../canvases'
2
+
3
+ module SequenceLogo
4
+ class Sequence
5
+ attr_reader :sequence, :name
6
+ def initialize(sequence, options = {})
7
+ raise 'Wrong sequence' unless Sequence.valid_sequence?(sequence)
8
+ @sequence = sequence
9
+ @name = options[:name] || sequence
10
+ end
11
+
12
+ def length
13
+ sequence.length
14
+ end
15
+
16
+ def revcomp
17
+ Sequence.new(Sequence.revcomp(sequence), name: name)
18
+ end
19
+
20
+ def render(canvas_factory)
21
+ canvas = LogoCanvas.new(canvas_factory)
22
+ canvas.background(Magick::HatchFill.new('white', 'white'))
23
+ sequence.each_char do |letter|
24
+ canvas.add_letter(letter)
25
+ end
26
+ canvas.image
27
+ end
28
+
29
+ def self.complement(sequence)
30
+ sequence.tr('acgtACGT', 'tgcaTGCA')
31
+ end
32
+ def self.revcomp(sequence)
33
+ complement(sequence).reverse
34
+ end
35
+
36
+ def self.valid_sequence?(sequence)
37
+ sequence.match /\A[acgt]+\z/i
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,45 @@
1
+ require_relative 'sequence'
2
+ require_relative '../canvases'
3
+
4
+ module SequenceLogo
5
+ class SequenceWithSNP
6
+ attr_reader :left, :allele_variants, :right, :name
7
+ def initialize(left, allele_variants, right, options = {})
8
+ raise unless Sequence.valid_sequence?(left)
9
+ raise unless Sequence.valid_sequence?(right)
10
+ raise unless allele_variants.all?{|letter| %w[A C G T].include?(letter.upcase) }
11
+ @left, @allele_variants, @right = left, allele_variants, right
12
+ @name = options[:name] || (left + '_' + allele_variants.join('-') + '_' + right)
13
+ end
14
+
15
+ def self.from_string(sequence, options = {})
16
+ left, mid, right = sequence.split(/[\[\]]/)
17
+ allele_variants = mid.split('/')
18
+ SequenceWithSNP.new(left, allele_variants, right, options)
19
+ end
20
+
21
+ def length
22
+ left.length + 1 + right.length
23
+ end
24
+
25
+ def revcomp
26
+ SequenceWithSNP.new(Sequence.revcomp(right),
27
+ allele_variants.map{|letter| Sequence.complement(letter) },
28
+ Sequence.revcomp(left))
29
+ end
30
+
31
+ def render(canvas_factory)
32
+ canvas = LogoCanvas.new(canvas_factory)
33
+ canvas.background(Magick::HatchFill.new('white', 'white'))
34
+ left.each_char{|letter| canvas.add_letter(letter) }
35
+ canvas.add_position_ordered(snp_position_heights)
36
+ right.each_char{|letter| canvas.add_letter(letter) }
37
+ canvas.image
38
+ end
39
+
40
+ def snp_position_heights
41
+ allele_variants.map{|letter| [1.0 / allele_variants.size, letter] }
42
+ end
43
+ private :snp_position_heights
44
+ end
45
+ end
@@ -1,43 +1,64 @@
1
1
  require_relative '../../sequence_logo'
2
2
  require 'fileutils'
3
- require 'cgi'
4
3
  require 'tempfile'
5
4
 
6
- def generate_glued_logo(alignment_infos, options, total_orientation, output_file)
7
- logos = {}
8
- logo_files = []
9
- rightmost_side = alignment_infos.map do |line|
5
+ def load_alignment_infos(alignment_lines)
6
+ alignment_lines.map{|line|
10
7
  filename, shift, orientation, motif_name = line.strip.split("\t")
8
+ motif_name ||= File.basename(filename, File.extname(filename))
11
9
  shift = shift.to_i
12
- shift + get_ppm_from_file(filename).length
13
- end.max
10
+ orientation = orientation.downcase.to_sym
14
11
 
15
- alignment_infos.each do |line|
16
- filename, shift, orientation, motif_name = line.strip.split("\t")
17
- motif_name ||= CGI.unescape(File.basename(filename, File.extname(filename)))
18
12
  ppm = get_ppm_from_file(filename)
19
- shift = shift.to_i
20
- raise 'Unknown orientation' unless %w[direct revcomp].include?(orientation.downcase)
21
- if total_orientation == :revcomp
22
- orientation = (orientation == 'direct') ? 'revcomp' : 'direct'
23
- shift = rightmost_side - shift - ppm.length
24
- end
25
13
  checkerr("bad input file: #{filename}") { ppm == nil }
26
- logo_file = Tempfile.new(filename)
27
- logo_files << logo_file
28
- case orientation
29
- when 'direct'
30
- SequenceLogo.draw_logo(ppm, options).write("PNG:#{logo_file.path}")
31
- when 'revcomp'
32
- SequenceLogo.draw_logo(ppm.revcomp, options).write("PNG:#{logo_file.path}")
33
- else
34
- raise "Unknown orientation #{orientation} for #{motif_name}"
35
- end
36
- logos[logo_file.path] = {shift: shift, length: ppm.length, name: motif_name}
14
+ ppm.name ||= motif_name
15
+
16
+ raise 'Unknown orientation' unless [:direct, :revcomp].include?(orientation)
17
+
18
+ ppm_oriented = (orientation == :direct) ? ppm : ppm.revcomp
19
+ {motif: ppm_oriented, shift: shift}
20
+ }
21
+ end
22
+
23
+ def make_logo_alignment(aligned_motifs, options)
24
+ alignment = SequenceLogo::Alignment.new
25
+ aligned_motifs.map {|motif_infos|
26
+ ppm_logo = SequenceLogo::PPMLogo.new(motif_infos[:motif],
27
+ icd_mode: options[:icd_mode],
28
+ words_count: options[:words_count],
29
+ enable_threshold_lines: options[:threshold_lines])
30
+ alignment += SequenceLogo::Alignment::Item.new(ppm_logo, motif_infos[:shift])
31
+ }
32
+ alignment
33
+ end
34
+
35
+ def readlines_from_file_or_stdin(argv, options = {})
36
+ default_options = { source_not_given_msg: 'Specify input data',
37
+ both_sources_given_msg: 'Specify either file with data or data itself in stdin, not both'}
38
+ options = default_options.merge(options)
39
+ raise options[:both_sources_given_msg] if !argv.empty? && !$stdin.tty?
40
+ if !argv.empty?
41
+ lines = File.readlines(argv.first)
42
+ elsif !$stdin.tty?
43
+ lines = $stdin.readlines
44
+ else
45
+ raise ArgumentError, options[:source_not_given_msg]
37
46
  end
47
+ lines
48
+ end
38
49
 
39
- SequenceLogo.glue_files(logos, output_file, options)
40
- logo_files.each(&:close)
50
+ def direct_output_filename(output_file)
51
+ extname = File.extname(output_file)
52
+ basename = File.basename_wo_extname(output_file)
53
+ dirname = File.dirname(output_file)
54
+ File.join(dirname, "#{basename}_direct#{extname}")
55
+ end
56
+
57
+ def reverse_output_filename(output_file)
58
+ extname = File.extname(output_file)
59
+ basename = File.basename_wo_extname(output_file)
60
+ dirname = File.dirname(output_file)
61
+ File.join(dirname, "#{basename}_revcomp#{extname}")
41
62
  end
42
63
 
43
64
  begin
@@ -57,7 +78,7 @@ begin
57
78
 
58
79
  argv = ARGV
59
80
  total_orientation = :direct
60
- default_options = {x_unit: 30, y_unit: 60, words_count: nil, icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa', logo_shift: 300, text_size_pt: 24}
81
+ default_options = {x_unit: 30, y_unit: 60, words_count: nil, icd_mode: :discrete, threshold_lines: false, scheme: 'nucl_simpa', logo_shift: 300, text_size: 24}
61
82
  cli = SequenceLogo::CLI.new(default_options)
62
83
  cli.instance_eval do
63
84
  parser.banner = doc
@@ -78,25 +99,25 @@ begin
78
99
  output_file = argv.shift
79
100
  raise ArgumentError, 'Specify output file' unless output_file
80
101
 
81
- raise 'You can specify alignment infos either from file or from stdin. Don\'t use both sources simultaneously' if !ARGV.empty? && !$stdin.tty?
82
- if !ARGV.empty?
83
- alignment_infos = File.readlines(ARGV.shift)
84
- elsif !$stdin.tty?
85
- alignment_infos = $stdin.readlines
86
- else
87
- raise ArgumentError, 'Specify alignment infos'
88
- end
102
+ alignment_lines = readlines_from_file_or_stdin(argv, source_not_given_msg: 'Specify alignment infos',
103
+ both_sources_given_msg: 'You can specify alignment infos either from file or from stdin. Don\'t use both sources simultaneously')
104
+ alignment = make_logo_alignment(load_alignment_infos(alignment_lines), options)
89
105
 
90
- if total_orientation == :both
91
- extname = File.extname(output_file)
92
- basename = File.basename(output_file, extname)
93
- dirname = File.dirname(output_file)
94
- generate_glued_logo(alignment_infos, options, :direct, File.join(dirname, "#{basename}_direct.#{extname}"))
95
- generate_glued_logo(alignment_infos, options, :revcomp, File.join(dirname, "#{basename}_revcomp.#{extname}"))
96
- else
97
- generate_glued_logo(alignment_infos, options, total_orientation, output_file)
106
+ scheme_dir = File.join(SequenceLogo::AssetsPath, options[:scheme])
107
+ letter_images = SequenceLogo::CanvasFactory.letter_images(scheme_dir)
108
+ canvas_factory = SequenceLogo::CanvasFactory.new(letter_images, x_unit: options[:x_unit], y_unit: options[:y_unit],
109
+ text_size: options[:text_size], logo_shift: options[:logo_shift])
110
+
111
+ case total_orientation
112
+ when :direct
113
+ alignment.render(canvas_factory).write('PNG:' + output_file)
114
+ when :revcomp
115
+ alignment.revcomp.render(canvas_factory).write('PNG:' + output_file)
116
+ when :both
117
+ alignment.render(canvas_factory).write('PNG:' + direct_output_filename(output_file))
118
+ alignment.revcomp.render(canvas_factory).write('PNG:' + reverse_output_filename(output_file))
98
119
  end
99
120
 
100
121
  rescue => err
101
122
  $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
102
- end
123
+ end
@@ -1,17 +1,50 @@
1
1
  require_relative '../../sequence_logo'
2
2
  require 'shellwords'
3
3
 
4
+ # [{renderable: , name: }] --> [{renderable: , filename: }]
5
+ def in_necessary_orientations(objects_to_render, orientation, logo_folder)
6
+ objects_to_render.map do |infos|
7
+ case orientation
8
+ when :direct
9
+ {renderable: infos[:renderable], filename: File.join(logo_folder, "#{infos[:name]}.png") }
10
+ when :revcomp
11
+ {renderable: infos[:renderable].revcomp, filename: File.join(logo_folder, "#{infos[:name]}.png") }
12
+ when :both
13
+ [ {renderable: infos[:renderable], filename: File.join(logo_folder, "#{infos[:name]}_direct.png") },
14
+ {renderable: infos[:renderable].revcomp, filename: File.join(logo_folder, "#{infos[:name]}_revcomp.png") } ]
15
+ end
16
+ end.flatten
17
+ end
18
+
19
+ def arglist_augmented_with_stdin(argv)
20
+ result = argv
21
+ result += $stdin.read.shellsplit unless $stdin.tty?
22
+ result
23
+ end
24
+
4
25
  begin
26
+ include SequenceLogo
27
+
5
28
  doc = <<-EOS
6
- sequence_logo is a tool for drawing motif logos. It is able to process PCM files either as a position matrix (*.pat or *.pcm), or in FASTA format (file extensions: .mfa, .fasta, .plain), or in SMall BiSMark format (.xml), or in IUPAC format (any other extension).
29
+ sequence_logo is a tool for drawing motif and sequence logos
30
+ It is able to process
31
+ - PCM / PPM format i.e. position count/frequency matrix (*.pat or *.pcm) - preferable
32
+ - FASTA format (file extensions: .mfa, .fasta, .plain)
33
+ - SMall BiSMark format (.xml)
34
+ - IUPAC format (any other extension)
7
35
  Usage:
8
- sequence_logo [options] <pcm/ppm file>...
36
+ sequence_logo [options] <motif file>...
9
37
  or
10
38
  ls pcm_folder/*.pcm | sequence_logo [options]
39
+ or
40
+ sequence_logo --sequence <sequence>...
41
+ or
42
+ sequence_logo --snp-sequence <sequence with SNP>...
43
+
11
44
  EOS
12
45
 
13
46
  argv = ARGV
14
- default_options = {x_unit: 30, y_unit: 60, words_count: nil, orientation: :both, logo_folder: '.', icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa'}
47
+ default_options = {x_unit: 30, y_unit: 60, words_count: nil, orientation: :direct, logo_folder: '.', icd_mode: :discrete, threshold_lines: true, scheme: 'nucl_simpa'}
15
48
  cli = SequenceLogo::CLI.new(default_options)
16
49
  cli.instance_eval do
17
50
  parser.banner = doc
@@ -23,30 +56,61 @@ begin
23
56
  raise ArgumentError, 'Orientation can be either direct or revcomp or both' unless [:direct, :revcomp, :both].include?(v)
24
57
  options[:orientation] = v
25
58
  end
59
+
60
+ parser.on('--snp-sequence', 'Specify sequences with SNP (like ATCTC[C/G]CCTAAT) instead of motif filenames') do
61
+ options[:sequence_w_snp] = true
62
+ end
63
+ parser.on('--sequence', 'Specify sequence (like ATCTCGCCTAAT) instead of motif filenames') do
64
+ options[:sequence] = true
65
+ end
26
66
  end
27
67
  options = cli.parse_options!(argv)
28
68
 
29
69
  logo_folder = options[:logo_folder]
30
70
  Dir.mkdir(logo_folder) unless Dir.exist?(logo_folder)
31
71
 
32
- filenames = argv
33
- filenames += $stdin.read.shellsplit unless $stdin.tty?
34
- raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
72
+ scheme_dir = File.join(SequenceLogo::AssetsPath, options[:scheme])
73
+ letter_images = SequenceLogo::CanvasFactory.letter_images(scheme_dir)
74
+ canvas_factory = SequenceLogo::CanvasFactory.new(letter_images, x_unit: options[:x_unit], y_unit: options[:y_unit])
75
+
76
+ raise "Specify either sequence or sequence with SNP or none of them, but not both" if options[:sequence] && options[:sequence_w_snp]
35
77
 
36
- filenames.each do |filename|
37
- ppm = get_ppm_from_file(filename)
38
- checkerr("bad input file: #{filename}") { ppm == nil }
78
+ objects_to_render = []
79
+ if options[:sequence]
80
+ sequences = arglist_augmented_with_stdin(argv)
81
+ raise ArgumentError, 'Specify at least one sequence' if sequences.empty?
82
+
83
+ sequences.each do |sequence|
84
+ objects_to_render << {renderable: SequenceLogo::Sequence.new(sequence),
85
+ name: File.join(logo_folder, sequence)}
86
+ end
87
+ elsif options[:sequence_w_snp]
88
+ sequences = arglist_augmented_with_stdin(argv)
89
+ raise ArgumentError, 'Specify at least one sequence' if sequences.empty?
39
90
 
40
- filename_wo_ext = File.basename(filename, File.extname(filename))
41
- if [:direct, :both].include?(options[:orientation])
42
- direct_output = File.join(logo_folder, "#{filename_wo_ext}_direct.png")
43
- SequenceLogo.draw_logo(ppm, options).write(direct_output)
91
+ sequences.each do |sequence_w_snp|
92
+ objects_to_render << {renderable: SequenceLogo::SequenceWithSNP.from_string(sequence_w_snp),
93
+ name: File.join(logo_folder, sequence_w_snp.gsub(/[\[\]\/]/, '_'))}
44
94
  end
45
- if [:revcomp, :both].include?(options[:orientation])
46
- revcomp_output = File.join(logo_folder, "#{filename_wo_ext}_revcomp.png")
47
- SequenceLogo.draw_logo(ppm.revcomp, options).write(revcomp_output)
95
+ else
96
+ filenames = arglist_augmented_with_stdin(argv)
97
+ raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
98
+
99
+ filenames.each do |filename|
100
+ ppm = get_ppm_from_file(filename)
101
+ checkerr("bad input file: #{filename}") { ppm == nil }
102
+
103
+ logo = SequenceLogo::PPMLogo.new( ppm,
104
+ icd_mode: options[:icd_mode],
105
+ words_count: options[:words_count],
106
+ enable_threshold_lines: options[:threshold_lines])
107
+ objects_to_render << {renderable: logo, name: File.join(logo_folder, File.basename_wo_extname(filename))}
48
108
  end
49
109
  end
110
+
111
+ in_necessary_orientations(objects_to_render, options[:orientation], logo_folder).each do |infos|
112
+ infos[:renderable].render(canvas_factory).write("PNG:#{infos[:filename]}")
113
+ end
50
114
  rescue => err
51
115
  $stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
52
- end
116
+ end
@@ -0,0 +1,14 @@
1
+ require 'RMagick'
2
+
3
+ class Magick::ImageList
4
+ def put_image_at(image, x, y)
5
+ self << image
6
+ cur_image.page = Magick::Rectangle.new(0, 0, x, y)
7
+ end
8
+
9
+ # add transparent layer so that full canvas size can't be less than given size
10
+ def set_minimal_size(x_size, y_size)
11
+ empty_image = Magick::Image.new(x_size, y_size){ self.background_color = 'transparent'}
12
+ self.unshift(empty_image)
13
+ end
14
+ end
@@ -1,114 +1,5 @@
1
+ require_relative 'canvases'
2
+ require_relative 'canvas_factory'
3
+ require_relative 'alignment'
4
+ require_relative 'data_models'
1
5
  require_relative 'ytilib'
2
- require 'RMagick'
3
-
4
- module SequenceLogo
5
- def self.draw_threshold_lines(i_logo, ppm)
6
- x_size = i_logo.columns
7
- y_size = i_logo.rows
8
-
9
- line2of4 = y_size - ppm.get_line(ppm.icd2of4) * y_size
10
- lineThc = y_size - ppm.get_line(ppm.icdThc) * y_size
11
- lineTlc = y_size - ppm.get_line(ppm.icdTlc) * y_size
12
-
13
- dr = Magick::Draw.new
14
- dr.fill('transparent')
15
-
16
- dr.stroke_width(y_size / 200.0)
17
- dr.stroke_dasharray(7,7)
18
-
19
- dr.stroke('silver')
20
- dr.line(0, line2of4, x_size, line2of4)
21
- dr.line(0, lineThc, x_size, lineThc)
22
- dr.line(0, lineTlc, x_size, lineTlc)
23
-
24
- dr.draw(i_logo)
25
- end
26
-
27
- def self.create_canvas(ppm, options)
28
- x_size = options[:x_unit] * ppm.length
29
- y_size = options[:y_unit]
30
-
31
- i_logo = Magick::ImageList.new
32
- if options[:icd_mode] == :discrete
33
- i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'white'))
34
- draw_threshold_lines(i_logo, ppm) if options[:threshold_lines]
35
- else
36
- i_logo.new_image(x_size, y_size, Magick::HatchFill.new('white', 'bisque'))
37
- end
38
-
39
- i_logo
40
- end
41
-
42
- def self.letter_images(scheme_dir)
43
- if File.exist?(File.join(scheme_dir,'a.png'))
44
- extension = 'png'
45
- elsif File.exist?(File.join(scheme_dir,'a.gif'))
46
- extension = 'gif'
47
- else
48
- raise "Scheme not exists in folder #{scheme_dir}"
49
- end
50
-
51
- letter_files = %w[a c g t].collect{|letter| File.join(scheme_dir, "#{letter}.#{extension}") }
52
- Magick::ImageList.new(*letter_files)
53
- end
54
-
55
- def self.draw_letters_on_canvas(i_logo, i_letters, ppm, options)
56
- y_unit = options[:y_unit]
57
- x_unit = options[:x_unit]
58
- matrix = ppm.get_logo(options[:icd_mode])
59
- matrix['A'].each_index { |i|
60
- y_pos = 0
61
- sorted_letters = ['A', 'C', 'G', 'T'].collect { |letter| {:score => matrix[letter][i], :letter => letter} }.sort_by { |pair| pair[:score] }.collect { |pair| pair[:letter] }.reverse
62
- sorted_letters.each { |letter|
63
- next if y_unit * matrix[letter][i] <= 1
64
- letter_index = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}[letter]
65
- y_block = (y_unit * matrix[letter][i]).round
66
- i_logo << i_letters[letter_index].dup.resize(x_unit, y_block)
67
- y_pos += y_block
68
- i_logo.cur_image.page = Magick::Rectangle.new(0, 0, i * x_unit, y_unit - y_pos )
69
- }
70
- }
71
- end
72
-
73
- def self.draw_logo(ppm, options = {})
74
- ppm.words_count = options[:words_count] if options[:words_count]
75
- unless ppm.words_count
76
- report "words count for PPM is undefined, assuming weblogo mode"
77
- options[:icd_mode] = :weblogo
78
- end
79
- i_logo = create_canvas(ppm, options)
80
- scheme_dir = File.join(AssetsPath, options[:scheme])
81
- draw_letters_on_canvas(i_logo, letter_images(scheme_dir), ppm, options)
82
- i_logo = i_logo.flatten_images
83
- end
84
-
85
- # logos = { filename => {shift: ..., length: ..., name: ...} }
86
- def self.glue_files(logos, output_file, options)
87
- logo_shift = options[:logo_shift] || 300
88
- x_unit = options[:x_unit] || 30
89
- y_unit = options[:y_unit] || 60
90
- text_size = options[:text_size] || 24
91
-
92
- leftmost_shift = logos.map{|file,infos| infos[:shift] }.min
93
- logos.each{|file, infos| infos[:shift] -= leftmost_shift}
94
- full_alignment_size = logos.map{|file,infos| infos[:length] + infos[:shift] }.max
95
-
96
- x_size = logo_shift + full_alignment_size * x_unit
97
- y_size = logos.size * y_unit
98
- command_string = "convert -size #{ x_size }x#{ y_size } -pointsize #{text_size} xc:white "
99
- logos.each_with_index do |(logo_filename,infos), idx|
100
- logo_x_start = logo_shift + infos[:shift] * x_unit
101
- logo_y_start = y_unit * idx
102
- command_string << "\"#{ logo_filename }\" -geometry +#{ logo_x_start }+#{ logo_y_start } -composite "
103
- end
104
-
105
- command_draw_names = ""
106
- logos.each_with_index do |(logo_filename,infos), idx|
107
- text_x_start = 10
108
- text_y_start = y_unit * (idx + 0.5)
109
- command_draw_names << "-draw \"text #{ text_x_start },#{ text_y_start } '#{infos[:name]}'\" "
110
- end
111
-
112
- system(command_string + command_draw_names + "\"#{output_file}\"")
113
- end
114
- end
@@ -0,0 +1,5 @@
1
+ class File
2
+ def self.basename_wo_extname(filename)
3
+ File.basename(filename, File.extname(filename))
4
+ end
5
+ end
@@ -1,3 +1,3 @@
1
1
  module SequenceLogo
2
- VERSION = "1.0.6"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -7,4 +7,4 @@ require_relative 'ytilib/randoom'
7
7
  require_relative 'ytilib/bismark'
8
8
  require_relative 'ytilib/hack1'
9
9
  require_relative 'ytilib/infocod'
10
- require_relative 'ytilib/ppm_support'
10
+ require_relative 'ytilib/ppm_support'
@@ -5,6 +5,18 @@ module Ytilib
5
5
  attr_accessor :words_count
6
6
 
7
7
  alias length size
8
+
9
+ def each_position_index(&block)
10
+ @matrix['A'].each_index(&block)
11
+ end
12
+
13
+ def each_position(&block)
14
+ return enum_for(:each_position) unless block_given?
15
+ @matrix['A'].each_index do |i|
16
+ position = ['A', 'C', 'G', 'T'].map{|letter| @matrix[letter][i] }
17
+ yield position
18
+ end
19
+ end
8
20
 
9
21
  def score_mean(bckgr = Randoom::DEF_PROBS)
10
22
  (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } }
@@ -324,12 +336,12 @@ module Ytilib
324
336
  attributes = {"length" => @size}
325
337
  attributes["words-count"] = @words_count if @words_count && @words_count > 0
326
338
  pe = b.add_element( pwm ? "PWM" : "PCM", attributes )
327
- (0...@matrix['A'].size).each { |i|
339
+ each_position_index do |i|
328
340
  pm_c = pe.add_element("pm-column", {"position" => i+1})
329
341
  ['A', 'C', 'G', 'T'].each { |l|
330
342
  pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s)
331
343
  }
332
- }
344
+ end
333
345
  end
334
346
 
335
347
  def PM.from_bismark(b, iupacomp = false)
@@ -43,40 +43,25 @@ class PPM
43
43
 
44
44
 
45
45
  def get_logo_weblogo
46
- rseq = []
47
- @matrix['A'].each_index { |i|
48
- rseq << 2 + ['A','C','G','T'].inject(0) { |sum, l|
49
- pn = @matrix[l][i]
50
- sum += (pn == 0) ? 0 : pn * Math.log(pn) / Math.log(2)
51
- }
46
+ rseq = each_position.map {|position|
47
+ position.map{|el| (el == 0) ? 0 : el * Math.log2(el) }.inject(0, :+) + 2
52
48
  }
53
-
54
- mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
55
- @matrix['A'].each_index { |i|
56
- ['A','C','G','T'].each { |l|
57
- mat[l][i]= @matrix[l][i] * rseq[i] / 2 # so we can handle a '2 bit' scale here
58
- }
49
+
50
+ each_position.with_index.map {|position, ind|
51
+ position.map{|el| el * rseq[ind] / 2 }
59
52
  }
60
-
61
- mat
62
53
  end
63
54
 
64
55
  def get_logo_discrete
65
56
  checkerr("words count is undefined") { !words_count }
66
57
 
67
- rseq = []
68
- @matrix['A'].each_index { |i|
69
- rseq << (icd4of4 == 0 ? 1.0 : ( (infocod(i) - icd4of4) / icd4of4 ).abs)
58
+ rseq = each_position_index.map {|i|
59
+ (icd4of4 == 0) ? 1.0 : get_line(infocod(i))
70
60
  }
71
-
72
- mat = {'A'=>[], 'C'=>[], 'G'=>[], 'T'=>[]}
73
- @matrix['A'].each_index { |i|
74
- ['A','C','G','T'].each { |l|
75
- mat[l][i] = @matrix[l][i] * rseq[i]
76
- }
61
+
62
+ each_position.with_index.map {|position, ind|
63
+ position.map{|el| el * rseq[ind] }
77
64
  }
78
-
79
- mat
80
65
  end
81
66
 
82
67
  def revcomp
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequence_logo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-21 00:00:00.000000000 Z
11
+ date: 2014-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rmagick
@@ -44,14 +44,32 @@ files:
44
44
  - bin/glue_logos
45
45
  - bin/sequence_logo
46
46
  - lib/sequence_logo.rb
47
+ - lib/sequence_logo/alignment.rb
47
48
  - lib/sequence_logo/assets/nucl_simpa/a.png
48
49
  - lib/sequence_logo/assets/nucl_simpa/c.png
49
50
  - lib/sequence_logo/assets/nucl_simpa/g.png
50
51
  - lib/sequence_logo/assets/nucl_simpa/t.png
52
+ - lib/sequence_logo/assets/nucl_simpa_bw/a.png
53
+ - lib/sequence_logo/assets/nucl_simpa_bw/c.png
54
+ - lib/sequence_logo/assets/nucl_simpa_bw/g.png
55
+ - lib/sequence_logo/assets/nucl_simpa_bw/t.png
56
+ - lib/sequence_logo/canvas_factory.rb
57
+ - lib/sequence_logo/canvases.rb
58
+ - lib/sequence_logo/canvases/gluing_canvas.rb
59
+ - lib/sequence_logo/canvases/horizontal_gluing_canvas.rb
60
+ - lib/sequence_logo/canvases/logo_canvas.rb
61
+ - lib/sequence_logo/canvases/vertical_gluing_canvas.rb
51
62
  - lib/sequence_logo/cli.rb
63
+ - lib/sequence_logo/data_models.rb
64
+ - lib/sequence_logo/data_models/ppm_logo.rb
65
+ - lib/sequence_logo/data_models/predefined_logo.rb
66
+ - lib/sequence_logo/data_models/sequence.rb
67
+ - lib/sequence_logo/data_models/sequence_with_snp.rb
52
68
  - lib/sequence_logo/exec/glue_logos.rb
53
69
  - lib/sequence_logo/exec/sequence_logo.rb
70
+ - lib/sequence_logo/magick_support.rb
54
71
  - lib/sequence_logo/pmflogo_lib.rb
72
+ - lib/sequence_logo/support.rb
55
73
  - lib/sequence_logo/version.rb
56
74
  - lib/sequence_logo/ytilib.rb
57
75
  - lib/sequence_logo/ytilib/addon.rb
@@ -90,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
90
108
  version: '0'
91
109
  requirements: []
92
110
  rubyforge_project:
93
- rubygems_version: 2.2.1
111
+ rubygems_version: 2.2.2
94
112
  signing_key:
95
113
  specification_version: 4
96
114
  summary: Tool for drawing sequence logos of motifs