sequence_logo 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a53d6d681797aec870766358a55b460aa10a87f1
4
- data.tar.gz: 43c4712e2b1bbef50ac8de24626c115c5492dfaf
3
+ metadata.gz: 6f1c64cbd02b8379d849f555d8aebacffc885886
4
+ data.tar.gz: b2d2189f7f72a60faa86978d997ab8a240604e45
5
5
  SHA512:
6
- metadata.gz: 4331c46e148cb21342e7cb5d39ab635ff633086974a3e87d20a8d1efd1534f5590d6df7ccd50f0a5e7eea1d6222fc8a03709a2200f90432083a63a016bf3f9fa
7
- data.tar.gz: 7b1013ed36a55848884760f55bb2418789e98fd9cd040e846a8748360889c3bcf34d0a92d520b3e034d501eb21ec2df3948a5d3be41b1b1f22aad699a7546e3b
6
+ metadata.gz: 900ab873940c8646483a5b56b5596a2cdc606fda91941a33134f82b8e163a50a8dc00f6454a0a8608e78a236deb6b7393f6367e9298c55162befc2e1d2270851
7
+ data.tar.gz: 7503db8c78201db87ae1069deb9585e86721c4a7d8e0910debfa2c48fc46443411bca8036a72761462acc3cf94f1644092068a20c756185234bc983927569ffc
data/TODO.txt CHANGED
@@ -8,3 +8,5 @@ And how to make possible use schemes inside the assets path
8
8
  Make tests
9
9
 
10
10
  Wrap execs into methods
11
+
12
+ remove SolidFill class because it's in a new version of RMagick
@@ -1,4 +1,4 @@
1
- require 'RMagick'
1
+ require 'rmagick'
2
2
  require_relative '../magick_support'
3
3
  require_relative 'horizontal_gluing_canvas'
4
4
 
@@ -21,9 +21,6 @@ module SequenceLogo
21
21
  opts.on('-y', '--y-unit Y_UNIT', 'Base letter height') do |v|
22
22
  options[:y_unit] = v.to_i
23
23
  end
24
- opts.on('--words-count WEIGHT', 'Define alignment weight') do |v|
25
- options[:words_count] = v.to_f
26
- end
27
24
  opts.on('--icd-mode MODE', 'Calculation mode: discrete or weblogo', 'Weblogo is assumed if word count not given') do |v|
28
25
  options[:icd_mode] = v.to_sym
29
26
  raise ArgumentError, 'icd-mode can be either discrete or weblogo' unless [:discrete, :weblogo].include?(options[:icd_mode])
@@ -3,19 +3,12 @@ require_relative '../canvases'
3
3
  module SequenceLogo
4
4
  # wrapper around PPM to make it possible to configure rendering in a flexible way
5
5
  class PPMLogo
6
- attr_reader :ppm, :words_count, :icd_mode, :enable_threshold_lines
6
+ attr_reader :ppm, :icd_mode, :enable_threshold_lines
7
7
 
8
8
  def initialize(ppm, options = {})
9
9
  @ppm = ppm
10
- @words_count = options[:words_count]
11
10
  @icd_mode = options[:icd_mode]
12
11
  @enable_threshold_lines = options[:enable_threshold_lines]
13
-
14
- @ppm.words_count = @words_count if @words_count
15
- unless ppm.words_count
16
- report "words count for PPM is undefined, assuming weblogo mode"
17
- @icd_mode = :weblogo
18
- end
19
12
  end
20
13
 
21
14
  def length
@@ -27,7 +20,7 @@ module SequenceLogo
27
20
  end
28
21
 
29
22
  def revcomp
30
- PPMLogo.new(ppm.revcomp, words_count: words_count, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
23
+ PPMLogo.new(ppm.revcomp, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
31
24
  end
32
25
 
33
26
  def logo_matrix
@@ -37,10 +30,11 @@ module SequenceLogo
37
30
  def render(canvas_factory)
38
31
  canvas = LogoCanvas.new(canvas_factory)
39
32
  canvas.background(canvas_factory.background_fill)
33
+ word_count = ppm.each_position.map{|pos| pos.inject(0.0, &:+) }.max
40
34
  if icd_mode == :discrete && enable_threshold_lines
41
- canvas.draw_threshold_line(ppm.get_line(ppm.icd2of4))
42
- canvas.draw_threshold_line(ppm.get_line(ppm.icdThc))
43
- canvas.draw_threshold_line(ppm.get_line(ppm.icdTlc))
35
+ canvas.draw_threshold_line( scale(icd2of4(word_count), relative_to: icd4of4(word_count)) )
36
+ canvas.draw_threshold_line( scale(icdThc(word_count), relative_to: icd4of4(word_count)) )
37
+ canvas.draw_threshold_line( scale(icdTlc(word_count), relative_to: icd4of4(word_count)) )
44
38
  end
45
39
 
46
40
  logo_matrix.each do |position|
@@ -0,0 +1,99 @@
1
+ require 'bioinform'
2
+ require 'fileutils'
3
+
4
+ ##########
5
+ module Bioinform
6
+ module MotifModel
7
+ class DiPM # Doesn't work with alphabet
8
+
9
+ def self.from_file(filename)
10
+ parser = Bioinform::MatrixParser.new(fix_nucleotides_number: 16)
11
+ infos = parser.parse(File.read(filename))
12
+ name = infos[:name] || File.basename(filename, File.extname(filename))
13
+ pcm = self.new(infos[:matrix]).named(name)
14
+ end
15
+
16
+ attr_reader :matrix
17
+ def initialize(matrix)
18
+ @matrix = matrix
19
+ raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
20
+ end
21
+
22
+ def validation_errors
23
+ errors = []
24
+ errors << "matrix should be an Array" unless matrix.is_a? Array
25
+ errors << "matrix shouldn't be empty" unless matrix.size > 0
26
+ errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
27
+ errors << "each matrix position should be of size compatible with alphabet (=#{16})" unless matrix.all?{|pos| pos.size == 16 }
28
+ errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
29
+ errors
30
+ end
31
+ private :validation_errors
32
+
33
+ def valid?
34
+ validation_errors.empty?
35
+ rescue
36
+ false
37
+ end
38
+
39
+ private :valid?
40
+
41
+ def to_s
42
+ MotifFormatter.new.format(self)
43
+ end
44
+
45
+ def named(name)
46
+ NamedModel.new(self, name)
47
+ end
48
+
49
+ def length
50
+ matrix.size + 1
51
+ end
52
+
53
+ def ==(other)
54
+ self.class == other.class && matrix == other.matrix # alphabet should be considered (when alphabet implemented)
55
+ end
56
+
57
+ def each_position
58
+ if block_given?
59
+ matrix.each{|pos| yield pos}
60
+ else
61
+ self.to_enum(:each_position)
62
+ end
63
+ end
64
+
65
+ end
66
+
67
+ class DiPCM < DiPM
68
+
69
+ def sum_dependent_on_first_letter(pos)
70
+ mono_pos = Array.new(4, 0.0)
71
+ pos.each.with_index{|count, diletter|
72
+ first_letter = diletter / 4
73
+ mono_pos[first_letter] += count
74
+ }
75
+ mono_pos
76
+ end
77
+
78
+ def sum_dependent_on_second_letter(pos)
79
+ mono_pos = Array.new(4, 0.0)
80
+ pos.each.with_index{|count, diletter|
81
+ second_letter = diletter % 4
82
+ mono_pos[second_letter] += count
83
+ }
84
+ mono_pos
85
+ end
86
+
87
+ def to_mono
88
+ mono_matrix = each_position.map{|pos|
89
+ sum_dependent_on_first_letter(pos)
90
+ } + [ sum_dependent_on_second_letter(matrix.last) ]
91
+
92
+ PCM.new(mono_matrix)
93
+ end
94
+ end
95
+
96
+ class DiPWM < DiPM
97
+ end
98
+ end
99
+ end
@@ -9,8 +9,7 @@ def load_alignment_infos(alignment_lines)
9
9
  shift = shift.to_i
10
10
  orientation = orientation.downcase.to_sym
11
11
 
12
- ppm = get_ppm_from_file(filename)
13
- checkerr("bad input file: #{filename}") { ppm == nil }
12
+ ppm = Bioinform::MotifModel::PCM.from_file(filename)
14
13
  ppm.name ||= motif_name
15
14
 
16
15
  raise 'Unknown orientation' unless [:direct, :revcomp].include?(orientation)
@@ -25,7 +24,6 @@ def make_logo_alignment(aligned_motifs, options)
25
24
  aligned_motifs.map {|motif_infos|
26
25
  ppm_logo = SequenceLogo::PPMLogo.new(motif_infos[:motif],
27
26
  icd_mode: options[:icd_mode],
28
- words_count: options[:words_count],
29
27
  enable_threshold_lines: options[:threshold_lines])
30
28
  alignment += SequenceLogo::Alignment::Item.new(ppm_logo, motif_infos[:shift])
31
29
  }
@@ -79,7 +77,7 @@ begin
79
77
  argv = ARGV
80
78
  total_orientation = :direct
81
79
  default_options = { x_unit: 30, y_unit: 60, logo_shift: 300, scheme: 'nucl_simpa',
82
- words_count: nil, icd_mode: :discrete, threshold_lines: false,
80
+ icd_mode: :discrete, threshold_lines: false,
83
81
  text_size: 24, background_color: 'white' }
84
82
  cli = SequenceLogo::CLI.new(default_options)
85
83
  cli.instance_eval do
@@ -45,8 +45,9 @@ begin
45
45
 
46
46
  argv = ARGV
47
47
  default_options = { x_unit: 30, y_unit: 60, scheme: 'nucl_simpa',
48
- words_count: nil, orientation: :direct, icd_mode: :discrete, threshold_lines: true,
49
- logo_folder: '.', background_color: 'white' }
48
+ orientation: :direct, icd_mode: :discrete, threshold_lines: true,
49
+ logo_folder: '.', background_color: 'white',
50
+ from_dinucleotide: false }
50
51
  cli = SequenceLogo::CLI.new(default_options)
51
52
  cli.instance_eval do
52
53
  parser.banner = doc
@@ -72,6 +73,8 @@ begin
72
73
  options[:background_fill] = Magick::SolidFill.new(v)
73
74
  end
74
75
  end
76
+
77
+ parser.on('--dinucleotide'){ options[:from_dinucleotide] = true }
75
78
  end
76
79
  options = cli.parse_options!(argv)
77
80
 
@@ -107,12 +110,14 @@ begin
107
110
  raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
108
111
 
109
112
  filenames.each do |filename|
110
- ppm = get_ppm_from_file(filename)
111
- checkerr("bad input file: #{filename}") { ppm == nil }
113
+ if options[:from_dinucleotide]
114
+ ppm = Bioinform::MotifModel::DiPCM.from_file(filename).to_mono
115
+ else
116
+ ppm = Bioinform::MotifModel::PCM.from_file(filename)
117
+ end
112
118
 
113
119
  logo = SequenceLogo::PPMLogo.new( ppm,
114
120
  icd_mode: options[:icd_mode],
115
- words_count: options[:words_count],
116
121
  enable_threshold_lines: options[:threshold_lines])
117
122
  objects_to_render << {renderable: logo, name: File.basename_wo_extname(filename)}
118
123
  end
@@ -1,4 +1,4 @@
1
- require 'RMagick'
1
+ require 'rmagick'
2
2
 
3
3
  module Magick
4
4
  class ImageList
@@ -2,4 +2,5 @@ require_relative 'canvases'
2
2
  require_relative 'canvas_factory'
3
3
  require_relative 'alignment'
4
4
  require_relative 'data_models'
5
- require_relative 'ytilib'
5
+ require_relative 'ppm_support'
6
+ require_relative 'di_pm'
@@ -0,0 +1,81 @@
1
+ require 'bioinform'
2
+
3
+ class Float
4
+ def log_fact
5
+ Math.lgamma(self + 1).first
6
+ end
7
+ end
8
+
9
+ class Integer
10
+ def log_fact
11
+ self.to_f.log_fact
12
+ end
13
+ end
14
+
15
+ def position_infocod(pos)
16
+ words_count = pos.inject(0.0, &:+)
17
+ ( pos.map(&:log_fact).inject(0.0, &:+) - words_count.log_fact ) / words_count
18
+ end
19
+
20
+ def icd4of4(words_count, floor: false)
21
+ i4o4 = words_count / 4.0
22
+ i4o4 = i4o4.floor if floor
23
+ position_infocod([i4o4, i4o4, i4o4, i4o4])
24
+ end
25
+
26
+ def icd2of4(words_count, floor: false)
27
+ i2o4 = words_count / 2.0
28
+ i2o4 = i2o4.floor if floor
29
+ position_infocod([i2o4, i2o4, 0, 0]) # 0 is equal to words_count % 2, because 0! = 1!
30
+ end
31
+
32
+ def icd3of4(words_count, floor: false)
33
+ i3o4 = words_count / 3.0
34
+ i3o4 = i3o4.floor if floor
35
+ addon = floor ? words_count % 3 : 0
36
+ position_infocod([i3o4, i3o4, i3o4, addon])
37
+ end
38
+
39
+ def icdThc(words_count, floor: false)
40
+ icd3of4(words_count, floor: floor)
41
+ end
42
+
43
+ def icdTlc(words_count, floor: false)
44
+ io = words_count / 6.0
45
+ io = io.floor if floor
46
+ position_infocod([2*io, 2*io, io, io])
47
+ end
48
+
49
+ def scale(value, relative_to:)
50
+ ( (value - relative_to) / relative_to ).abs
51
+ end
52
+
53
+ class Bioinform::MotifModel::PCM
54
+ def get_logo(icd_mode)
55
+ case icd_mode.to_s
56
+ when 'weblogo'
57
+ get_logo_weblogo
58
+ when 'discrete'
59
+ get_logo_discrete
60
+ end
61
+ end
62
+
63
+ def get_logo_weblogo
64
+ each_position.map{|position|
65
+ word_count = position.inject(0.0, &:+)
66
+ inf_content = position.map{|el|
67
+ (el == 0) ? 0 : (el / word_count) * Math.log2(el / word_count)
68
+ }.inject(0.0, :+) + 2
69
+ position.map{|el| (el / word_count) * inf_content / 2 }
70
+ }
71
+ end
72
+
73
+ def get_logo_discrete
74
+ each_position.map{|position|
75
+ word_count = position.inject(0.0, &:+)
76
+ icd4of4 = icd4of4(word_count)
77
+ inf_content = (icd4of4 == 0) ? 1.0 : scale(position_infocod(position), relative_to: icd4of4)
78
+ position.map{|el| (el / word_count) * inf_content }
79
+ }
80
+ end
81
+ end
@@ -1,3 +1,3 @@
1
1
  module SequenceLogo
2
- VERSION = "1.1.2"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -1,70 +0,0 @@
1
- class Object
2
- def deep_dup
3
- Marshal.load(Marshal.dump(self))
4
- end
5
- end
6
-
7
- def get_ppm_from_file(in_file_name)
8
- case File.ext_wo_name(in_file_name)
9
- when 'pat', 'pcm'
10
- pm = PM.load(in_file_name)
11
- pm.fixwc if pm.words_count
12
- when 'mfa', 'fasta', 'plain'
13
- pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
14
- when 'xml'
15
- pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
16
- when in_file_name
17
- pm = PPM.from_IUPAC(in_file_name.upcase)
18
- end
19
- pm.get_ppm
20
- rescue
21
- nil
22
- end
23
-
24
- class PPM
25
- attr_accessor :name
26
-
27
- def get_ppm
28
- self
29
- end
30
-
31
- def get_line(v)
32
- ( (v - icd4of4) / icd4of4 ).abs
33
- end
34
-
35
- def get_logo(icd_mode)
36
- case icd_mode.to_s
37
- when 'weblogo'
38
- get_logo_weblogo
39
- when 'discrete'
40
- get_logo_discrete
41
- end
42
- end
43
-
44
-
45
- def get_logo_weblogo
46
- rseq = each_position.map {|position|
47
- position.map{|el| (el == 0) ? 0 : el * Math.log2(el) }.inject(0, :+) + 2
48
- }
49
-
50
- each_position.with_index.map {|position, ind|
51
- position.map{|el| el * rseq[ind] / 2 }
52
- }
53
- end
54
-
55
- def get_logo_discrete
56
- checkerr("words count is undefined") { !words_count }
57
-
58
- rseq = each_position_index.map {|i|
59
- (icd4of4 == 0) ? 1.0 : get_line(infocod(i))
60
- }
61
-
62
- each_position.with_index.map {|position, ind|
63
- position.map{|el| el * rseq[ind] }
64
- }
65
- end
66
-
67
- def revcomp
68
- deep_dup.revcomp!
69
- end
70
- end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequence_logo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-05 00:00:00.000000000 Z
11
+ date: 2015-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rmagick
@@ -69,23 +69,15 @@ files:
69
69
  - lib/sequence_logo/data_models/predefined_logo.rb
70
70
  - lib/sequence_logo/data_models/sequence.rb
71
71
  - lib/sequence_logo/data_models/sequence_with_snp.rb
72
+ - lib/sequence_logo/di_pm.rb
72
73
  - lib/sequence_logo/exec/glue_logos.rb
73
74
  - lib/sequence_logo/exec/sequence_logo.rb
74
75
  - lib/sequence_logo/magick_support.rb
75
76
  - lib/sequence_logo/pmflogo_lib.rb
77
+ - lib/sequence_logo/ppm_support.rb
76
78
  - lib/sequence_logo/support.rb
77
79
  - lib/sequence_logo/version.rb
78
- - lib/sequence_logo/ytilib.rb
79
- - lib/sequence_logo/ytilib/addon.rb
80
- - lib/sequence_logo/ytilib/bismark.rb
81
- - lib/sequence_logo/ytilib/hack1.rb
82
- - lib/sequence_logo/ytilib/infocod.rb
83
- - lib/sequence_logo/ytilib/iupac.rb
84
- - lib/sequence_logo/ytilib/pm.rb
85
- - lib/sequence_logo/ytilib/pmsd.rb
86
80
  - lib/sequence_logo/ytilib/ppm_support.rb
87
- - lib/sequence_logo/ytilib/randoom.rb
88
- - lib/sequence_logo/ytilib/ytilib.rb
89
81
  - sequence_logo.gemspec
90
82
  - test/data/logo/AHR_si_direct.png
91
83
  - test/data/logo/AHR_si_revcomp.png
@@ -112,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
104
  version: '0'
113
105
  requirements: []
114
106
  rubyforge_project:
115
- rubygems_version: 2.2.2
107
+ rubygems_version: 2.4.6
116
108
  signing_key:
117
109
  specification_version: 4
118
110
  summary: Tool for drawing sequence logos of motifs