sequence_logo 1.1.2 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/TODO.txt +2 -0
- data/lib/sequence_logo/canvases/logo_canvas.rb +1 -1
- data/lib/sequence_logo/cli.rb +0 -3
- data/lib/sequence_logo/data_models/ppm_logo.rb +6 -12
- data/lib/sequence_logo/di_pm.rb +99 -0
- data/lib/sequence_logo/exec/glue_logos.rb +2 -4
- data/lib/sequence_logo/exec/sequence_logo.rb +10 -5
- data/lib/sequence_logo/magick_support.rb +1 -1
- data/lib/sequence_logo/pmflogo_lib.rb +2 -1
- data/lib/sequence_logo/ppm_support.rb +81 -0
- data/lib/sequence_logo/version.rb +1 -1
- data/lib/sequence_logo/ytilib/ppm_support.rb +0 -70
- metadata +5 -13
- data/lib/sequence_logo/ytilib.rb +0 -10
- data/lib/sequence_logo/ytilib/addon.rb +0 -247
- data/lib/sequence_logo/ytilib/bismark.rb +0 -71
- data/lib/sequence_logo/ytilib/hack1.rb +0 -75
- data/lib/sequence_logo/ytilib/infocod.rb +0 -108
- data/lib/sequence_logo/ytilib/iupac.rb +0 -92
- data/lib/sequence_logo/ytilib/pm.rb +0 -574
- data/lib/sequence_logo/ytilib/pmsd.rb +0 -99
- data/lib/sequence_logo/ytilib/randoom.rb +0 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +0 -147
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f1c64cbd02b8379d849f555d8aebacffc885886
|
4
|
+
data.tar.gz: b2d2189f7f72a60faa86978d997ab8a240604e45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 900ab873940c8646483a5b56b5596a2cdc606fda91941a33134f82b8e163a50a8dc00f6454a0a8608e78a236deb6b7393f6367e9298c55162befc2e1d2270851
|
7
|
+
data.tar.gz: 7503db8c78201db87ae1069deb9585e86721c4a7d8e0910debfa2c48fc46443411bca8036a72761462acc3cf94f1644092068a20c756185234bc983927569ffc
|
data/TODO.txt
CHANGED
data/lib/sequence_logo/cli.rb
CHANGED
@@ -21,9 +21,6 @@ module SequenceLogo
|
|
21
21
|
opts.on('-y', '--y-unit Y_UNIT', 'Base letter height') do |v|
|
22
22
|
options[:y_unit] = v.to_i
|
23
23
|
end
|
24
|
-
opts.on('--words-count WEIGHT', 'Define alignment weight') do |v|
|
25
|
-
options[:words_count] = v.to_f
|
26
|
-
end
|
27
24
|
opts.on('--icd-mode MODE', 'Calculation mode: discrete or weblogo', 'Weblogo is assumed if word count not given') do |v|
|
28
25
|
options[:icd_mode] = v.to_sym
|
29
26
|
raise ArgumentError, 'icd-mode can be either discrete or weblogo' unless [:discrete, :weblogo].include?(options[:icd_mode])
|
@@ -3,19 +3,12 @@ require_relative '../canvases'
|
|
3
3
|
module SequenceLogo
|
4
4
|
# wrapper around PPM to make it possible to configure rendering in a flexible way
|
5
5
|
class PPMLogo
|
6
|
-
attr_reader :ppm, :
|
6
|
+
attr_reader :ppm, :icd_mode, :enable_threshold_lines
|
7
7
|
|
8
8
|
def initialize(ppm, options = {})
|
9
9
|
@ppm = ppm
|
10
|
-
@words_count = options[:words_count]
|
11
10
|
@icd_mode = options[:icd_mode]
|
12
11
|
@enable_threshold_lines = options[:enable_threshold_lines]
|
13
|
-
|
14
|
-
@ppm.words_count = @words_count if @words_count
|
15
|
-
unless ppm.words_count
|
16
|
-
report "words count for PPM is undefined, assuming weblogo mode"
|
17
|
-
@icd_mode = :weblogo
|
18
|
-
end
|
19
12
|
end
|
20
13
|
|
21
14
|
def length
|
@@ -27,7 +20,7 @@ module SequenceLogo
|
|
27
20
|
end
|
28
21
|
|
29
22
|
def revcomp
|
30
|
-
PPMLogo.new(ppm.revcomp,
|
23
|
+
PPMLogo.new(ppm.revcomp, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
|
31
24
|
end
|
32
25
|
|
33
26
|
def logo_matrix
|
@@ -37,10 +30,11 @@ module SequenceLogo
|
|
37
30
|
def render(canvas_factory)
|
38
31
|
canvas = LogoCanvas.new(canvas_factory)
|
39
32
|
canvas.background(canvas_factory.background_fill)
|
33
|
+
word_count = ppm.each_position.map{|pos| pos.inject(0.0, &:+) }.max
|
40
34
|
if icd_mode == :discrete && enable_threshold_lines
|
41
|
-
canvas.draw_threshold_line(
|
42
|
-
canvas.draw_threshold_line(
|
43
|
-
canvas.draw_threshold_line(
|
35
|
+
canvas.draw_threshold_line( scale(icd2of4(word_count), relative_to: icd4of4(word_count)) )
|
36
|
+
canvas.draw_threshold_line( scale(icdThc(word_count), relative_to: icd4of4(word_count)) )
|
37
|
+
canvas.draw_threshold_line( scale(icdTlc(word_count), relative_to: icd4of4(word_count)) )
|
44
38
|
end
|
45
39
|
|
46
40
|
logo_matrix.each do |position|
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'bioinform'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
##########
|
5
|
+
module Bioinform
|
6
|
+
module MotifModel
|
7
|
+
class DiPM # Doesn't work with alphabet
|
8
|
+
|
9
|
+
def self.from_file(filename)
|
10
|
+
parser = Bioinform::MatrixParser.new(fix_nucleotides_number: 16)
|
11
|
+
infos = parser.parse(File.read(filename))
|
12
|
+
name = infos[:name] || File.basename(filename, File.extname(filename))
|
13
|
+
pcm = self.new(infos[:matrix]).named(name)
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :matrix
|
17
|
+
def initialize(matrix)
|
18
|
+
@matrix = matrix
|
19
|
+
raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
|
20
|
+
end
|
21
|
+
|
22
|
+
def validation_errors
|
23
|
+
errors = []
|
24
|
+
errors << "matrix should be an Array" unless matrix.is_a? Array
|
25
|
+
errors << "matrix shouldn't be empty" unless matrix.size > 0
|
26
|
+
errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
|
27
|
+
errors << "each matrix position should be of size compatible with alphabet (=#{16})" unless matrix.all?{|pos| pos.size == 16 }
|
28
|
+
errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
29
|
+
errors
|
30
|
+
end
|
31
|
+
private :validation_errors
|
32
|
+
|
33
|
+
def valid?
|
34
|
+
validation_errors.empty?
|
35
|
+
rescue
|
36
|
+
false
|
37
|
+
end
|
38
|
+
|
39
|
+
private :valid?
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
MotifFormatter.new.format(self)
|
43
|
+
end
|
44
|
+
|
45
|
+
def named(name)
|
46
|
+
NamedModel.new(self, name)
|
47
|
+
end
|
48
|
+
|
49
|
+
def length
|
50
|
+
matrix.size + 1
|
51
|
+
end
|
52
|
+
|
53
|
+
def ==(other)
|
54
|
+
self.class == other.class && matrix == other.matrix # alphabet should be considered (when alphabet implemented)
|
55
|
+
end
|
56
|
+
|
57
|
+
def each_position
|
58
|
+
if block_given?
|
59
|
+
matrix.each{|pos| yield pos}
|
60
|
+
else
|
61
|
+
self.to_enum(:each_position)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
class DiPCM < DiPM
|
68
|
+
|
69
|
+
def sum_dependent_on_first_letter(pos)
|
70
|
+
mono_pos = Array.new(4, 0.0)
|
71
|
+
pos.each.with_index{|count, diletter|
|
72
|
+
first_letter = diletter / 4
|
73
|
+
mono_pos[first_letter] += count
|
74
|
+
}
|
75
|
+
mono_pos
|
76
|
+
end
|
77
|
+
|
78
|
+
def sum_dependent_on_second_letter(pos)
|
79
|
+
mono_pos = Array.new(4, 0.0)
|
80
|
+
pos.each.with_index{|count, diletter|
|
81
|
+
second_letter = diletter % 4
|
82
|
+
mono_pos[second_letter] += count
|
83
|
+
}
|
84
|
+
mono_pos
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_mono
|
88
|
+
mono_matrix = each_position.map{|pos|
|
89
|
+
sum_dependent_on_first_letter(pos)
|
90
|
+
} + [ sum_dependent_on_second_letter(matrix.last) ]
|
91
|
+
|
92
|
+
PCM.new(mono_matrix)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class DiPWM < DiPM
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -9,8 +9,7 @@ def load_alignment_infos(alignment_lines)
|
|
9
9
|
shift = shift.to_i
|
10
10
|
orientation = orientation.downcase.to_sym
|
11
11
|
|
12
|
-
ppm =
|
13
|
-
checkerr("bad input file: #{filename}") { ppm == nil }
|
12
|
+
ppm = Bioinform::MotifModel::PCM.from_file(filename)
|
14
13
|
ppm.name ||= motif_name
|
15
14
|
|
16
15
|
raise 'Unknown orientation' unless [:direct, :revcomp].include?(orientation)
|
@@ -25,7 +24,6 @@ def make_logo_alignment(aligned_motifs, options)
|
|
25
24
|
aligned_motifs.map {|motif_infos|
|
26
25
|
ppm_logo = SequenceLogo::PPMLogo.new(motif_infos[:motif],
|
27
26
|
icd_mode: options[:icd_mode],
|
28
|
-
words_count: options[:words_count],
|
29
27
|
enable_threshold_lines: options[:threshold_lines])
|
30
28
|
alignment += SequenceLogo::Alignment::Item.new(ppm_logo, motif_infos[:shift])
|
31
29
|
}
|
@@ -79,7 +77,7 @@ begin
|
|
79
77
|
argv = ARGV
|
80
78
|
total_orientation = :direct
|
81
79
|
default_options = { x_unit: 30, y_unit: 60, logo_shift: 300, scheme: 'nucl_simpa',
|
82
|
-
|
80
|
+
icd_mode: :discrete, threshold_lines: false,
|
83
81
|
text_size: 24, background_color: 'white' }
|
84
82
|
cli = SequenceLogo::CLI.new(default_options)
|
85
83
|
cli.instance_eval do
|
@@ -45,8 +45,9 @@ begin
|
|
45
45
|
|
46
46
|
argv = ARGV
|
47
47
|
default_options = { x_unit: 30, y_unit: 60, scheme: 'nucl_simpa',
|
48
|
-
|
49
|
-
logo_folder: '.', background_color: 'white'
|
48
|
+
orientation: :direct, icd_mode: :discrete, threshold_lines: true,
|
49
|
+
logo_folder: '.', background_color: 'white',
|
50
|
+
from_dinucleotide: false }
|
50
51
|
cli = SequenceLogo::CLI.new(default_options)
|
51
52
|
cli.instance_eval do
|
52
53
|
parser.banner = doc
|
@@ -72,6 +73,8 @@ begin
|
|
72
73
|
options[:background_fill] = Magick::SolidFill.new(v)
|
73
74
|
end
|
74
75
|
end
|
76
|
+
|
77
|
+
parser.on('--dinucleotide'){ options[:from_dinucleotide] = true }
|
75
78
|
end
|
76
79
|
options = cli.parse_options!(argv)
|
77
80
|
|
@@ -107,12 +110,14 @@ begin
|
|
107
110
|
raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
|
108
111
|
|
109
112
|
filenames.each do |filename|
|
110
|
-
|
111
|
-
|
113
|
+
if options[:from_dinucleotide]
|
114
|
+
ppm = Bioinform::MotifModel::DiPCM.from_file(filename).to_mono
|
115
|
+
else
|
116
|
+
ppm = Bioinform::MotifModel::PCM.from_file(filename)
|
117
|
+
end
|
112
118
|
|
113
119
|
logo = SequenceLogo::PPMLogo.new( ppm,
|
114
120
|
icd_mode: options[:icd_mode],
|
115
|
-
words_count: options[:words_count],
|
116
121
|
enable_threshold_lines: options[:threshold_lines])
|
117
122
|
objects_to_render << {renderable: logo, name: File.basename_wo_extname(filename)}
|
118
123
|
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'bioinform'
|
2
|
+
|
3
|
+
class Float
|
4
|
+
def log_fact
|
5
|
+
Math.lgamma(self + 1).first
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class Integer
|
10
|
+
def log_fact
|
11
|
+
self.to_f.log_fact
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def position_infocod(pos)
|
16
|
+
words_count = pos.inject(0.0, &:+)
|
17
|
+
( pos.map(&:log_fact).inject(0.0, &:+) - words_count.log_fact ) / words_count
|
18
|
+
end
|
19
|
+
|
20
|
+
def icd4of4(words_count, floor: false)
|
21
|
+
i4o4 = words_count / 4.0
|
22
|
+
i4o4 = i4o4.floor if floor
|
23
|
+
position_infocod([i4o4, i4o4, i4o4, i4o4])
|
24
|
+
end
|
25
|
+
|
26
|
+
def icd2of4(words_count, floor: false)
|
27
|
+
i2o4 = words_count / 2.0
|
28
|
+
i2o4 = i2o4.floor if floor
|
29
|
+
position_infocod([i2o4, i2o4, 0, 0]) # 0 is equal to words_count % 2, because 0! = 1!
|
30
|
+
end
|
31
|
+
|
32
|
+
def icd3of4(words_count, floor: false)
|
33
|
+
i3o4 = words_count / 3.0
|
34
|
+
i3o4 = i3o4.floor if floor
|
35
|
+
addon = floor ? words_count % 3 : 0
|
36
|
+
position_infocod([i3o4, i3o4, i3o4, addon])
|
37
|
+
end
|
38
|
+
|
39
|
+
def icdThc(words_count, floor: false)
|
40
|
+
icd3of4(words_count, floor: floor)
|
41
|
+
end
|
42
|
+
|
43
|
+
def icdTlc(words_count, floor: false)
|
44
|
+
io = words_count / 6.0
|
45
|
+
io = io.floor if floor
|
46
|
+
position_infocod([2*io, 2*io, io, io])
|
47
|
+
end
|
48
|
+
|
49
|
+
def scale(value, relative_to:)
|
50
|
+
( (value - relative_to) / relative_to ).abs
|
51
|
+
end
|
52
|
+
|
53
|
+
class Bioinform::MotifModel::PCM
|
54
|
+
def get_logo(icd_mode)
|
55
|
+
case icd_mode.to_s
|
56
|
+
when 'weblogo'
|
57
|
+
get_logo_weblogo
|
58
|
+
when 'discrete'
|
59
|
+
get_logo_discrete
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_logo_weblogo
|
64
|
+
each_position.map{|position|
|
65
|
+
word_count = position.inject(0.0, &:+)
|
66
|
+
inf_content = position.map{|el|
|
67
|
+
(el == 0) ? 0 : (el / word_count) * Math.log2(el / word_count)
|
68
|
+
}.inject(0.0, :+) + 2
|
69
|
+
position.map{|el| (el / word_count) * inf_content / 2 }
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_logo_discrete
|
74
|
+
each_position.map{|position|
|
75
|
+
word_count = position.inject(0.0, &:+)
|
76
|
+
icd4of4 = icd4of4(word_count)
|
77
|
+
inf_content = (icd4of4 == 0) ? 1.0 : scale(position_infocod(position), relative_to: icd4of4)
|
78
|
+
position.map{|el| (el / word_count) * inf_content }
|
79
|
+
}
|
80
|
+
end
|
81
|
+
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
class Object
|
2
|
-
def deep_dup
|
3
|
-
Marshal.load(Marshal.dump(self))
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
def get_ppm_from_file(in_file_name)
|
8
|
-
case File.ext_wo_name(in_file_name)
|
9
|
-
when 'pat', 'pcm'
|
10
|
-
pm = PM.load(in_file_name)
|
11
|
-
pm.fixwc if pm.words_count
|
12
|
-
when 'mfa', 'fasta', 'plain'
|
13
|
-
pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
|
14
|
-
when 'xml'
|
15
|
-
pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
|
16
|
-
when in_file_name
|
17
|
-
pm = PPM.from_IUPAC(in_file_name.upcase)
|
18
|
-
end
|
19
|
-
pm.get_ppm
|
20
|
-
rescue
|
21
|
-
nil
|
22
|
-
end
|
23
|
-
|
24
|
-
class PPM
|
25
|
-
attr_accessor :name
|
26
|
-
|
27
|
-
def get_ppm
|
28
|
-
self
|
29
|
-
end
|
30
|
-
|
31
|
-
def get_line(v)
|
32
|
-
( (v - icd4of4) / icd4of4 ).abs
|
33
|
-
end
|
34
|
-
|
35
|
-
def get_logo(icd_mode)
|
36
|
-
case icd_mode.to_s
|
37
|
-
when 'weblogo'
|
38
|
-
get_logo_weblogo
|
39
|
-
when 'discrete'
|
40
|
-
get_logo_discrete
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
def get_logo_weblogo
|
46
|
-
rseq = each_position.map {|position|
|
47
|
-
position.map{|el| (el == 0) ? 0 : el * Math.log2(el) }.inject(0, :+) + 2
|
48
|
-
}
|
49
|
-
|
50
|
-
each_position.with_index.map {|position, ind|
|
51
|
-
position.map{|el| el * rseq[ind] / 2 }
|
52
|
-
}
|
53
|
-
end
|
54
|
-
|
55
|
-
def get_logo_discrete
|
56
|
-
checkerr("words count is undefined") { !words_count }
|
57
|
-
|
58
|
-
rseq = each_position_index.map {|i|
|
59
|
-
(icd4of4 == 0) ? 1.0 : get_line(infocod(i))
|
60
|
-
}
|
61
|
-
|
62
|
-
each_position.with_index.map {|position, ind|
|
63
|
-
position.map{|el| el * rseq[ind] }
|
64
|
-
}
|
65
|
-
end
|
66
|
-
|
67
|
-
def revcomp
|
68
|
-
deep_dup.revcomp!
|
69
|
-
end
|
70
|
-
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequence_logo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-09-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rmagick
|
@@ -69,23 +69,15 @@ files:
|
|
69
69
|
- lib/sequence_logo/data_models/predefined_logo.rb
|
70
70
|
- lib/sequence_logo/data_models/sequence.rb
|
71
71
|
- lib/sequence_logo/data_models/sequence_with_snp.rb
|
72
|
+
- lib/sequence_logo/di_pm.rb
|
72
73
|
- lib/sequence_logo/exec/glue_logos.rb
|
73
74
|
- lib/sequence_logo/exec/sequence_logo.rb
|
74
75
|
- lib/sequence_logo/magick_support.rb
|
75
76
|
- lib/sequence_logo/pmflogo_lib.rb
|
77
|
+
- lib/sequence_logo/ppm_support.rb
|
76
78
|
- lib/sequence_logo/support.rb
|
77
79
|
- lib/sequence_logo/version.rb
|
78
|
-
- lib/sequence_logo/ytilib.rb
|
79
|
-
- lib/sequence_logo/ytilib/addon.rb
|
80
|
-
- lib/sequence_logo/ytilib/bismark.rb
|
81
|
-
- lib/sequence_logo/ytilib/hack1.rb
|
82
|
-
- lib/sequence_logo/ytilib/infocod.rb
|
83
|
-
- lib/sequence_logo/ytilib/iupac.rb
|
84
|
-
- lib/sequence_logo/ytilib/pm.rb
|
85
|
-
- lib/sequence_logo/ytilib/pmsd.rb
|
86
80
|
- lib/sequence_logo/ytilib/ppm_support.rb
|
87
|
-
- lib/sequence_logo/ytilib/randoom.rb
|
88
|
-
- lib/sequence_logo/ytilib/ytilib.rb
|
89
81
|
- sequence_logo.gemspec
|
90
82
|
- test/data/logo/AHR_si_direct.png
|
91
83
|
- test/data/logo/AHR_si_revcomp.png
|
@@ -112,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
104
|
version: '0'
|
113
105
|
requirements: []
|
114
106
|
rubyforge_project:
|
115
|
-
rubygems_version: 2.
|
107
|
+
rubygems_version: 2.4.6
|
116
108
|
signing_key:
|
117
109
|
specification_version: 4
|
118
110
|
summary: Tool for drawing sequence logos of motifs
|