sequence_logo 1.1.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a53d6d681797aec870766358a55b460aa10a87f1
4
- data.tar.gz: 43c4712e2b1bbef50ac8de24626c115c5492dfaf
3
+ metadata.gz: 6f1c64cbd02b8379d849f555d8aebacffc885886
4
+ data.tar.gz: b2d2189f7f72a60faa86978d997ab8a240604e45
5
5
  SHA512:
6
- metadata.gz: 4331c46e148cb21342e7cb5d39ab635ff633086974a3e87d20a8d1efd1534f5590d6df7ccd50f0a5e7eea1d6222fc8a03709a2200f90432083a63a016bf3f9fa
7
- data.tar.gz: 7b1013ed36a55848884760f55bb2418789e98fd9cd040e846a8748360889c3bcf34d0a92d520b3e034d501eb21ec2df3948a5d3be41b1b1f22aad699a7546e3b
6
+ metadata.gz: 900ab873940c8646483a5b56b5596a2cdc606fda91941a33134f82b8e163a50a8dc00f6454a0a8608e78a236deb6b7393f6367e9298c55162befc2e1d2270851
7
+ data.tar.gz: 7503db8c78201db87ae1069deb9585e86721c4a7d8e0910debfa2c48fc46443411bca8036a72761462acc3cf94f1644092068a20c756185234bc983927569ffc
data/TODO.txt CHANGED
@@ -8,3 +8,5 @@ And how to make possible use schemes inside the assets path
8
8
  Make tests
9
9
 
10
10
  Wrap execs into methods
11
+
12
+ remove SolidFill class because it's in a new version of RMagick
@@ -1,4 +1,4 @@
1
- require 'RMagick'
1
+ require 'rmagick'
2
2
  require_relative '../magick_support'
3
3
  require_relative 'horizontal_gluing_canvas'
4
4
 
@@ -21,9 +21,6 @@ module SequenceLogo
21
21
  opts.on('-y', '--y-unit Y_UNIT', 'Base letter height') do |v|
22
22
  options[:y_unit] = v.to_i
23
23
  end
24
- opts.on('--words-count WEIGHT', 'Define alignment weight') do |v|
25
- options[:words_count] = v.to_f
26
- end
27
24
  opts.on('--icd-mode MODE', 'Calculation mode: discrete or weblogo', 'Weblogo is assumed if word count not given') do |v|
28
25
  options[:icd_mode] = v.to_sym
29
26
  raise ArgumentError, 'icd-mode can be either discrete or weblogo' unless [:discrete, :weblogo].include?(options[:icd_mode])
@@ -3,19 +3,12 @@ require_relative '../canvases'
3
3
  module SequenceLogo
4
4
  # wrapper around PPM to make it possible to configure rendering in a flexible way
5
5
  class PPMLogo
6
- attr_reader :ppm, :words_count, :icd_mode, :enable_threshold_lines
6
+ attr_reader :ppm, :icd_mode, :enable_threshold_lines
7
7
 
8
8
  def initialize(ppm, options = {})
9
9
  @ppm = ppm
10
- @words_count = options[:words_count]
11
10
  @icd_mode = options[:icd_mode]
12
11
  @enable_threshold_lines = options[:enable_threshold_lines]
13
-
14
- @ppm.words_count = @words_count if @words_count
15
- unless ppm.words_count
16
- report "words count for PPM is undefined, assuming weblogo mode"
17
- @icd_mode = :weblogo
18
- end
19
12
  end
20
13
 
21
14
  def length
@@ -27,7 +20,7 @@ module SequenceLogo
27
20
  end
28
21
 
29
22
  def revcomp
30
- PPMLogo.new(ppm.revcomp, words_count: words_count, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
23
+ PPMLogo.new(ppm.revcomp, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
31
24
  end
32
25
 
33
26
  def logo_matrix
@@ -37,10 +30,11 @@ module SequenceLogo
37
30
  def render(canvas_factory)
38
31
  canvas = LogoCanvas.new(canvas_factory)
39
32
  canvas.background(canvas_factory.background_fill)
33
+ word_count = ppm.each_position.map{|pos| pos.inject(0.0, &:+) }.max
40
34
  if icd_mode == :discrete && enable_threshold_lines
41
- canvas.draw_threshold_line(ppm.get_line(ppm.icd2of4))
42
- canvas.draw_threshold_line(ppm.get_line(ppm.icdThc))
43
- canvas.draw_threshold_line(ppm.get_line(ppm.icdTlc))
35
+ canvas.draw_threshold_line( scale(icd2of4(word_count), relative_to: icd4of4(word_count)) )
36
+ canvas.draw_threshold_line( scale(icdThc(word_count), relative_to: icd4of4(word_count)) )
37
+ canvas.draw_threshold_line( scale(icdTlc(word_count), relative_to: icd4of4(word_count)) )
44
38
  end
45
39
 
46
40
  logo_matrix.each do |position|
@@ -0,0 +1,99 @@
1
+ require 'bioinform'
2
+ require 'fileutils'
3
+
4
+ ##########
5
+ module Bioinform
6
+ module MotifModel
7
+ class DiPM # Doesn't work with alphabet
8
+
9
+ def self.from_file(filename)
10
+ parser = Bioinform::MatrixParser.new(fix_nucleotides_number: 16)
11
+ infos = parser.parse(File.read(filename))
12
+ name = infos[:name] || File.basename(filename, File.extname(filename))
13
+ pcm = self.new(infos[:matrix]).named(name)
14
+ end
15
+
16
+ attr_reader :matrix
17
+ def initialize(matrix)
18
+ @matrix = matrix
19
+ raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
20
+ end
21
+
22
+ def validation_errors
23
+ errors = []
24
+ errors << "matrix should be an Array" unless matrix.is_a? Array
25
+ errors << "matrix shouldn't be empty" unless matrix.size > 0
26
+ errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
27
+ errors << "each matrix position should be of size compatible with alphabet (=#{16})" unless matrix.all?{|pos| pos.size == 16 }
28
+ errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
29
+ errors
30
+ end
31
+ private :validation_errors
32
+
33
+ def valid?
34
+ validation_errors.empty?
35
+ rescue
36
+ false
37
+ end
38
+
39
+ private :valid?
40
+
41
+ def to_s
42
+ MotifFormatter.new.format(self)
43
+ end
44
+
45
+ def named(name)
46
+ NamedModel.new(self, name)
47
+ end
48
+
49
+ def length
50
+ matrix.size + 1
51
+ end
52
+
53
+ def ==(other)
54
+ self.class == other.class && matrix == other.matrix # alphabet should be considered (when alphabet implemented)
55
+ end
56
+
57
+ def each_position
58
+ if block_given?
59
+ matrix.each{|pos| yield pos}
60
+ else
61
+ self.to_enum(:each_position)
62
+ end
63
+ end
64
+
65
+ end
66
+
67
+ class DiPCM < DiPM
68
+
69
+ def sum_dependent_on_first_letter(pos)
70
+ mono_pos = Array.new(4, 0.0)
71
+ pos.each.with_index{|count, diletter|
72
+ first_letter = diletter / 4
73
+ mono_pos[first_letter] += count
74
+ }
75
+ mono_pos
76
+ end
77
+
78
+ def sum_dependent_on_second_letter(pos)
79
+ mono_pos = Array.new(4, 0.0)
80
+ pos.each.with_index{|count, diletter|
81
+ second_letter = diletter % 4
82
+ mono_pos[second_letter] += count
83
+ }
84
+ mono_pos
85
+ end
86
+
87
+ def to_mono
88
+ mono_matrix = each_position.map{|pos|
89
+ sum_dependent_on_first_letter(pos)
90
+ } + [ sum_dependent_on_second_letter(matrix.last) ]
91
+
92
+ PCM.new(mono_matrix)
93
+ end
94
+ end
95
+
96
+ class DiPWM < DiPM
97
+ end
98
+ end
99
+ end
@@ -9,8 +9,7 @@ def load_alignment_infos(alignment_lines)
9
9
  shift = shift.to_i
10
10
  orientation = orientation.downcase.to_sym
11
11
 
12
- ppm = get_ppm_from_file(filename)
13
- checkerr("bad input file: #{filename}") { ppm == nil }
12
+ ppm = Bioinform::MotifModel::PCM.from_file(filename)
14
13
  ppm.name ||= motif_name
15
14
 
16
15
  raise 'Unknown orientation' unless [:direct, :revcomp].include?(orientation)
@@ -25,7 +24,6 @@ def make_logo_alignment(aligned_motifs, options)
25
24
  aligned_motifs.map {|motif_infos|
26
25
  ppm_logo = SequenceLogo::PPMLogo.new(motif_infos[:motif],
27
26
  icd_mode: options[:icd_mode],
28
- words_count: options[:words_count],
29
27
  enable_threshold_lines: options[:threshold_lines])
30
28
  alignment += SequenceLogo::Alignment::Item.new(ppm_logo, motif_infos[:shift])
31
29
  }
@@ -79,7 +77,7 @@ begin
79
77
  argv = ARGV
80
78
  total_orientation = :direct
81
79
  default_options = { x_unit: 30, y_unit: 60, logo_shift: 300, scheme: 'nucl_simpa',
82
- words_count: nil, icd_mode: :discrete, threshold_lines: false,
80
+ icd_mode: :discrete, threshold_lines: false,
83
81
  text_size: 24, background_color: 'white' }
84
82
  cli = SequenceLogo::CLI.new(default_options)
85
83
  cli.instance_eval do
@@ -45,8 +45,9 @@ begin
45
45
 
46
46
  argv = ARGV
47
47
  default_options = { x_unit: 30, y_unit: 60, scheme: 'nucl_simpa',
48
- words_count: nil, orientation: :direct, icd_mode: :discrete, threshold_lines: true,
49
- logo_folder: '.', background_color: 'white' }
48
+ orientation: :direct, icd_mode: :discrete, threshold_lines: true,
49
+ logo_folder: '.', background_color: 'white',
50
+ from_dinucleotide: false }
50
51
  cli = SequenceLogo::CLI.new(default_options)
51
52
  cli.instance_eval do
52
53
  parser.banner = doc
@@ -72,6 +73,8 @@ begin
72
73
  options[:background_fill] = Magick::SolidFill.new(v)
73
74
  end
74
75
  end
76
+
77
+ parser.on('--dinucleotide'){ options[:from_dinucleotide] = true }
75
78
  end
76
79
  options = cli.parse_options!(argv)
77
80
 
@@ -107,12 +110,14 @@ begin
107
110
  raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
108
111
 
109
112
  filenames.each do |filename|
110
- ppm = get_ppm_from_file(filename)
111
- checkerr("bad input file: #{filename}") { ppm == nil }
113
+ if options[:from_dinucleotide]
114
+ ppm = Bioinform::MotifModel::DiPCM.from_file(filename).to_mono
115
+ else
116
+ ppm = Bioinform::MotifModel::PCM.from_file(filename)
117
+ end
112
118
 
113
119
  logo = SequenceLogo::PPMLogo.new( ppm,
114
120
  icd_mode: options[:icd_mode],
115
- words_count: options[:words_count],
116
121
  enable_threshold_lines: options[:threshold_lines])
117
122
  objects_to_render << {renderable: logo, name: File.basename_wo_extname(filename)}
118
123
  end
@@ -1,4 +1,4 @@
1
- require 'RMagick'
1
+ require 'rmagick'
2
2
 
3
3
  module Magick
4
4
  class ImageList
@@ -2,4 +2,5 @@ require_relative 'canvases'
2
2
  require_relative 'canvas_factory'
3
3
  require_relative 'alignment'
4
4
  require_relative 'data_models'
5
- require_relative 'ytilib'
5
+ require_relative 'ppm_support'
6
+ require_relative 'di_pm'
@@ -0,0 +1,81 @@
1
+ require 'bioinform'
2
+
3
+ class Float
4
+ def log_fact
5
+ Math.lgamma(self + 1).first
6
+ end
7
+ end
8
+
9
+ class Integer
10
+ def log_fact
11
+ self.to_f.log_fact
12
+ end
13
+ end
14
+
15
+ def position_infocod(pos)
16
+ words_count = pos.inject(0.0, &:+)
17
+ ( pos.map(&:log_fact).inject(0.0, &:+) - words_count.log_fact ) / words_count
18
+ end
19
+
20
+ def icd4of4(words_count, floor: false)
21
+ i4o4 = words_count / 4.0
22
+ i4o4 = i4o4.floor if floor
23
+ position_infocod([i4o4, i4o4, i4o4, i4o4])
24
+ end
25
+
26
+ def icd2of4(words_count, floor: false)
27
+ i2o4 = words_count / 2.0
28
+ i2o4 = i2o4.floor if floor
29
+ position_infocod([i2o4, i2o4, 0, 0]) # 0 is equal to words_count % 2, because 0! = 1!
30
+ end
31
+
32
+ def icd3of4(words_count, floor: false)
33
+ i3o4 = words_count / 3.0
34
+ i3o4 = i3o4.floor if floor
35
+ addon = floor ? words_count % 3 : 0
36
+ position_infocod([i3o4, i3o4, i3o4, addon])
37
+ end
38
+
39
+ def icdThc(words_count, floor: false)
40
+ icd3of4(words_count, floor: floor)
41
+ end
42
+
43
+ def icdTlc(words_count, floor: false)
44
+ io = words_count / 6.0
45
+ io = io.floor if floor
46
+ position_infocod([2*io, 2*io, io, io])
47
+ end
48
+
49
+ def scale(value, relative_to:)
50
+ ( (value - relative_to) / relative_to ).abs
51
+ end
52
+
53
+ class Bioinform::MotifModel::PCM
54
+ def get_logo(icd_mode)
55
+ case icd_mode.to_s
56
+ when 'weblogo'
57
+ get_logo_weblogo
58
+ when 'discrete'
59
+ get_logo_discrete
60
+ end
61
+ end
62
+
63
+ def get_logo_weblogo
64
+ each_position.map{|position|
65
+ word_count = position.inject(0.0, &:+)
66
+ inf_content = position.map{|el|
67
+ (el == 0) ? 0 : (el / word_count) * Math.log2(el / word_count)
68
+ }.inject(0.0, :+) + 2
69
+ position.map{|el| (el / word_count) * inf_content / 2 }
70
+ }
71
+ end
72
+
73
+ def get_logo_discrete
74
+ each_position.map{|position|
75
+ word_count = position.inject(0.0, &:+)
76
+ icd4of4 = icd4of4(word_count)
77
+ inf_content = (icd4of4 == 0) ? 1.0 : scale(position_infocod(position), relative_to: icd4of4)
78
+ position.map{|el| (el / word_count) * inf_content }
79
+ }
80
+ end
81
+ end
@@ -1,3 +1,3 @@
1
1
  module SequenceLogo
2
- VERSION = "1.1.2"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -1,70 +0,0 @@
1
- class Object
2
- def deep_dup
3
- Marshal.load(Marshal.dump(self))
4
- end
5
- end
6
-
7
- def get_ppm_from_file(in_file_name)
8
- case File.ext_wo_name(in_file_name)
9
- when 'pat', 'pcm'
10
- pm = PM.load(in_file_name)
11
- pm.fixwc if pm.words_count
12
- when 'mfa', 'fasta', 'plain'
13
- pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
14
- when 'xml'
15
- pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
16
- when in_file_name
17
- pm = PPM.from_IUPAC(in_file_name.upcase)
18
- end
19
- pm.get_ppm
20
- rescue
21
- nil
22
- end
23
-
24
- class PPM
25
- attr_accessor :name
26
-
27
- def get_ppm
28
- self
29
- end
30
-
31
- def get_line(v)
32
- ( (v - icd4of4) / icd4of4 ).abs
33
- end
34
-
35
- def get_logo(icd_mode)
36
- case icd_mode.to_s
37
- when 'weblogo'
38
- get_logo_weblogo
39
- when 'discrete'
40
- get_logo_discrete
41
- end
42
- end
43
-
44
-
45
- def get_logo_weblogo
46
- rseq = each_position.map {|position|
47
- position.map{|el| (el == 0) ? 0 : el * Math.log2(el) }.inject(0, :+) + 2
48
- }
49
-
50
- each_position.with_index.map {|position, ind|
51
- position.map{|el| el * rseq[ind] / 2 }
52
- }
53
- end
54
-
55
- def get_logo_discrete
56
- checkerr("words count is undefined") { !words_count }
57
-
58
- rseq = each_position_index.map {|i|
59
- (icd4of4 == 0) ? 1.0 : get_line(infocod(i))
60
- }
61
-
62
- each_position.with_index.map {|position, ind|
63
- position.map{|el| el * rseq[ind] }
64
- }
65
- end
66
-
67
- def revcomp
68
- deep_dup.revcomp!
69
- end
70
- end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequence_logo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-05 00:00:00.000000000 Z
11
+ date: 2015-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rmagick
@@ -69,23 +69,15 @@ files:
69
69
  - lib/sequence_logo/data_models/predefined_logo.rb
70
70
  - lib/sequence_logo/data_models/sequence.rb
71
71
  - lib/sequence_logo/data_models/sequence_with_snp.rb
72
+ - lib/sequence_logo/di_pm.rb
72
73
  - lib/sequence_logo/exec/glue_logos.rb
73
74
  - lib/sequence_logo/exec/sequence_logo.rb
74
75
  - lib/sequence_logo/magick_support.rb
75
76
  - lib/sequence_logo/pmflogo_lib.rb
77
+ - lib/sequence_logo/ppm_support.rb
76
78
  - lib/sequence_logo/support.rb
77
79
  - lib/sequence_logo/version.rb
78
- - lib/sequence_logo/ytilib.rb
79
- - lib/sequence_logo/ytilib/addon.rb
80
- - lib/sequence_logo/ytilib/bismark.rb
81
- - lib/sequence_logo/ytilib/hack1.rb
82
- - lib/sequence_logo/ytilib/infocod.rb
83
- - lib/sequence_logo/ytilib/iupac.rb
84
- - lib/sequence_logo/ytilib/pm.rb
85
- - lib/sequence_logo/ytilib/pmsd.rb
86
80
  - lib/sequence_logo/ytilib/ppm_support.rb
87
- - lib/sequence_logo/ytilib/randoom.rb
88
- - lib/sequence_logo/ytilib/ytilib.rb
89
81
  - sequence_logo.gemspec
90
82
  - test/data/logo/AHR_si_direct.png
91
83
  - test/data/logo/AHR_si_revcomp.png
@@ -112,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
104
  version: '0'
113
105
  requirements: []
114
106
  rubyforge_project:
115
- rubygems_version: 2.2.2
107
+ rubygems_version: 2.4.6
116
108
  signing_key:
117
109
  specification_version: 4
118
110
  summary: Tool for drawing sequence logos of motifs