sequence_logo 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TODO.txt +2 -0
- data/lib/sequence_logo/canvases/logo_canvas.rb +1 -1
- data/lib/sequence_logo/cli.rb +0 -3
- data/lib/sequence_logo/data_models/ppm_logo.rb +6 -12
- data/lib/sequence_logo/di_pm.rb +99 -0
- data/lib/sequence_logo/exec/glue_logos.rb +2 -4
- data/lib/sequence_logo/exec/sequence_logo.rb +10 -5
- data/lib/sequence_logo/magick_support.rb +1 -1
- data/lib/sequence_logo/pmflogo_lib.rb +2 -1
- data/lib/sequence_logo/ppm_support.rb +81 -0
- data/lib/sequence_logo/version.rb +1 -1
- data/lib/sequence_logo/ytilib/ppm_support.rb +0 -70
- metadata +5 -13
- data/lib/sequence_logo/ytilib.rb +0 -10
- data/lib/sequence_logo/ytilib/addon.rb +0 -247
- data/lib/sequence_logo/ytilib/bismark.rb +0 -71
- data/lib/sequence_logo/ytilib/hack1.rb +0 -75
- data/lib/sequence_logo/ytilib/infocod.rb +0 -108
- data/lib/sequence_logo/ytilib/iupac.rb +0 -92
- data/lib/sequence_logo/ytilib/pm.rb +0 -574
- data/lib/sequence_logo/ytilib/pmsd.rb +0 -99
- data/lib/sequence_logo/ytilib/randoom.rb +0 -131
- data/lib/sequence_logo/ytilib/ytilib.rb +0 -147
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f1c64cbd02b8379d849f555d8aebacffc885886
|
4
|
+
data.tar.gz: b2d2189f7f72a60faa86978d997ab8a240604e45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 900ab873940c8646483a5b56b5596a2cdc606fda91941a33134f82b8e163a50a8dc00f6454a0a8608e78a236deb6b7393f6367e9298c55162befc2e1d2270851
|
7
|
+
data.tar.gz: 7503db8c78201db87ae1069deb9585e86721c4a7d8e0910debfa2c48fc46443411bca8036a72761462acc3cf94f1644092068a20c756185234bc983927569ffc
|
data/TODO.txt
CHANGED
data/lib/sequence_logo/cli.rb
CHANGED
@@ -21,9 +21,6 @@ module SequenceLogo
|
|
21
21
|
opts.on('-y', '--y-unit Y_UNIT', 'Base letter height') do |v|
|
22
22
|
options[:y_unit] = v.to_i
|
23
23
|
end
|
24
|
-
opts.on('--words-count WEIGHT', 'Define alignment weight') do |v|
|
25
|
-
options[:words_count] = v.to_f
|
26
|
-
end
|
27
24
|
opts.on('--icd-mode MODE', 'Calculation mode: discrete or weblogo', 'Weblogo is assumed if word count not given') do |v|
|
28
25
|
options[:icd_mode] = v.to_sym
|
29
26
|
raise ArgumentError, 'icd-mode can be either discrete or weblogo' unless [:discrete, :weblogo].include?(options[:icd_mode])
|
@@ -3,19 +3,12 @@ require_relative '../canvases'
|
|
3
3
|
module SequenceLogo
|
4
4
|
# wrapper around PPM to make it possible to configure rendering in a flexible way
|
5
5
|
class PPMLogo
|
6
|
-
attr_reader :ppm, :
|
6
|
+
attr_reader :ppm, :icd_mode, :enable_threshold_lines
|
7
7
|
|
8
8
|
def initialize(ppm, options = {})
|
9
9
|
@ppm = ppm
|
10
|
-
@words_count = options[:words_count]
|
11
10
|
@icd_mode = options[:icd_mode]
|
12
11
|
@enable_threshold_lines = options[:enable_threshold_lines]
|
13
|
-
|
14
|
-
@ppm.words_count = @words_count if @words_count
|
15
|
-
unless ppm.words_count
|
16
|
-
report "words count for PPM is undefined, assuming weblogo mode"
|
17
|
-
@icd_mode = :weblogo
|
18
|
-
end
|
19
12
|
end
|
20
13
|
|
21
14
|
def length
|
@@ -27,7 +20,7 @@ module SequenceLogo
|
|
27
20
|
end
|
28
21
|
|
29
22
|
def revcomp
|
30
|
-
PPMLogo.new(ppm.revcomp,
|
23
|
+
PPMLogo.new(ppm.revcomp, icd_mode: icd_mode, enable_threshold_lines: enable_threshold_lines)
|
31
24
|
end
|
32
25
|
|
33
26
|
def logo_matrix
|
@@ -37,10 +30,11 @@ module SequenceLogo
|
|
37
30
|
def render(canvas_factory)
|
38
31
|
canvas = LogoCanvas.new(canvas_factory)
|
39
32
|
canvas.background(canvas_factory.background_fill)
|
33
|
+
word_count = ppm.each_position.map{|pos| pos.inject(0.0, &:+) }.max
|
40
34
|
if icd_mode == :discrete && enable_threshold_lines
|
41
|
-
canvas.draw_threshold_line(
|
42
|
-
canvas.draw_threshold_line(
|
43
|
-
canvas.draw_threshold_line(
|
35
|
+
canvas.draw_threshold_line( scale(icd2of4(word_count), relative_to: icd4of4(word_count)) )
|
36
|
+
canvas.draw_threshold_line( scale(icdThc(word_count), relative_to: icd4of4(word_count)) )
|
37
|
+
canvas.draw_threshold_line( scale(icdTlc(word_count), relative_to: icd4of4(word_count)) )
|
44
38
|
end
|
45
39
|
|
46
40
|
logo_matrix.each do |position|
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'bioinform'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
##########
|
5
|
+
module Bioinform
|
6
|
+
module MotifModel
|
7
|
+
class DiPM # Doesn't work with alphabet
|
8
|
+
|
9
|
+
def self.from_file(filename)
|
10
|
+
parser = Bioinform::MatrixParser.new(fix_nucleotides_number: 16)
|
11
|
+
infos = parser.parse(File.read(filename))
|
12
|
+
name = infos[:name] || File.basename(filename, File.extname(filename))
|
13
|
+
pcm = self.new(infos[:matrix]).named(name)
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :matrix
|
17
|
+
def initialize(matrix)
|
18
|
+
@matrix = matrix
|
19
|
+
raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
|
20
|
+
end
|
21
|
+
|
22
|
+
def validation_errors
|
23
|
+
errors = []
|
24
|
+
errors << "matrix should be an Array" unless matrix.is_a? Array
|
25
|
+
errors << "matrix shouldn't be empty" unless matrix.size > 0
|
26
|
+
errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
|
27
|
+
errors << "each matrix position should be of size compatible with alphabet (=#{16})" unless matrix.all?{|pos| pos.size == 16 }
|
28
|
+
errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
29
|
+
errors
|
30
|
+
end
|
31
|
+
private :validation_errors
|
32
|
+
|
33
|
+
def valid?
|
34
|
+
validation_errors.empty?
|
35
|
+
rescue
|
36
|
+
false
|
37
|
+
end
|
38
|
+
|
39
|
+
private :valid?
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
MotifFormatter.new.format(self)
|
43
|
+
end
|
44
|
+
|
45
|
+
def named(name)
|
46
|
+
NamedModel.new(self, name)
|
47
|
+
end
|
48
|
+
|
49
|
+
def length
|
50
|
+
matrix.size + 1
|
51
|
+
end
|
52
|
+
|
53
|
+
def ==(other)
|
54
|
+
self.class == other.class && matrix == other.matrix # alphabet should be considered (when alphabet implemented)
|
55
|
+
end
|
56
|
+
|
57
|
+
def each_position
|
58
|
+
if block_given?
|
59
|
+
matrix.each{|pos| yield pos}
|
60
|
+
else
|
61
|
+
self.to_enum(:each_position)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
class DiPCM < DiPM
|
68
|
+
|
69
|
+
def sum_dependent_on_first_letter(pos)
|
70
|
+
mono_pos = Array.new(4, 0.0)
|
71
|
+
pos.each.with_index{|count, diletter|
|
72
|
+
first_letter = diletter / 4
|
73
|
+
mono_pos[first_letter] += count
|
74
|
+
}
|
75
|
+
mono_pos
|
76
|
+
end
|
77
|
+
|
78
|
+
def sum_dependent_on_second_letter(pos)
|
79
|
+
mono_pos = Array.new(4, 0.0)
|
80
|
+
pos.each.with_index{|count, diletter|
|
81
|
+
second_letter = diletter % 4
|
82
|
+
mono_pos[second_letter] += count
|
83
|
+
}
|
84
|
+
mono_pos
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_mono
|
88
|
+
mono_matrix = each_position.map{|pos|
|
89
|
+
sum_dependent_on_first_letter(pos)
|
90
|
+
} + [ sum_dependent_on_second_letter(matrix.last) ]
|
91
|
+
|
92
|
+
PCM.new(mono_matrix)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class DiPWM < DiPM
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -9,8 +9,7 @@ def load_alignment_infos(alignment_lines)
|
|
9
9
|
shift = shift.to_i
|
10
10
|
orientation = orientation.downcase.to_sym
|
11
11
|
|
12
|
-
ppm =
|
13
|
-
checkerr("bad input file: #{filename}") { ppm == nil }
|
12
|
+
ppm = Bioinform::MotifModel::PCM.from_file(filename)
|
14
13
|
ppm.name ||= motif_name
|
15
14
|
|
16
15
|
raise 'Unknown orientation' unless [:direct, :revcomp].include?(orientation)
|
@@ -25,7 +24,6 @@ def make_logo_alignment(aligned_motifs, options)
|
|
25
24
|
aligned_motifs.map {|motif_infos|
|
26
25
|
ppm_logo = SequenceLogo::PPMLogo.new(motif_infos[:motif],
|
27
26
|
icd_mode: options[:icd_mode],
|
28
|
-
words_count: options[:words_count],
|
29
27
|
enable_threshold_lines: options[:threshold_lines])
|
30
28
|
alignment += SequenceLogo::Alignment::Item.new(ppm_logo, motif_infos[:shift])
|
31
29
|
}
|
@@ -79,7 +77,7 @@ begin
|
|
79
77
|
argv = ARGV
|
80
78
|
total_orientation = :direct
|
81
79
|
default_options = { x_unit: 30, y_unit: 60, logo_shift: 300, scheme: 'nucl_simpa',
|
82
|
-
|
80
|
+
icd_mode: :discrete, threshold_lines: false,
|
83
81
|
text_size: 24, background_color: 'white' }
|
84
82
|
cli = SequenceLogo::CLI.new(default_options)
|
85
83
|
cli.instance_eval do
|
@@ -45,8 +45,9 @@ begin
|
|
45
45
|
|
46
46
|
argv = ARGV
|
47
47
|
default_options = { x_unit: 30, y_unit: 60, scheme: 'nucl_simpa',
|
48
|
-
|
49
|
-
logo_folder: '.', background_color: 'white'
|
48
|
+
orientation: :direct, icd_mode: :discrete, threshold_lines: true,
|
49
|
+
logo_folder: '.', background_color: 'white',
|
50
|
+
from_dinucleotide: false }
|
50
51
|
cli = SequenceLogo::CLI.new(default_options)
|
51
52
|
cli.instance_eval do
|
52
53
|
parser.banner = doc
|
@@ -72,6 +73,8 @@ begin
|
|
72
73
|
options[:background_fill] = Magick::SolidFill.new(v)
|
73
74
|
end
|
74
75
|
end
|
76
|
+
|
77
|
+
parser.on('--dinucleotide'){ options[:from_dinucleotide] = true }
|
75
78
|
end
|
76
79
|
options = cli.parse_options!(argv)
|
77
80
|
|
@@ -107,12 +110,14 @@ begin
|
|
107
110
|
raise ArgumentError, 'Specify at least one motif file' if filenames.empty?
|
108
111
|
|
109
112
|
filenames.each do |filename|
|
110
|
-
|
111
|
-
|
113
|
+
if options[:from_dinucleotide]
|
114
|
+
ppm = Bioinform::MotifModel::DiPCM.from_file(filename).to_mono
|
115
|
+
else
|
116
|
+
ppm = Bioinform::MotifModel::PCM.from_file(filename)
|
117
|
+
end
|
112
118
|
|
113
119
|
logo = SequenceLogo::PPMLogo.new( ppm,
|
114
120
|
icd_mode: options[:icd_mode],
|
115
|
-
words_count: options[:words_count],
|
116
121
|
enable_threshold_lines: options[:threshold_lines])
|
117
122
|
objects_to_render << {renderable: logo, name: File.basename_wo_extname(filename)}
|
118
123
|
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'bioinform'
|
2
|
+
|
3
|
+
class Float
|
4
|
+
def log_fact
|
5
|
+
Math.lgamma(self + 1).first
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class Integer
|
10
|
+
def log_fact
|
11
|
+
self.to_f.log_fact
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def position_infocod(pos)
|
16
|
+
words_count = pos.inject(0.0, &:+)
|
17
|
+
( pos.map(&:log_fact).inject(0.0, &:+) - words_count.log_fact ) / words_count
|
18
|
+
end
|
19
|
+
|
20
|
+
def icd4of4(words_count, floor: false)
|
21
|
+
i4o4 = words_count / 4.0
|
22
|
+
i4o4 = i4o4.floor if floor
|
23
|
+
position_infocod([i4o4, i4o4, i4o4, i4o4])
|
24
|
+
end
|
25
|
+
|
26
|
+
def icd2of4(words_count, floor: false)
|
27
|
+
i2o4 = words_count / 2.0
|
28
|
+
i2o4 = i2o4.floor if floor
|
29
|
+
position_infocod([i2o4, i2o4, 0, 0]) # 0 is equal to words_count % 2, because 0! = 1!
|
30
|
+
end
|
31
|
+
|
32
|
+
def icd3of4(words_count, floor: false)
|
33
|
+
i3o4 = words_count / 3.0
|
34
|
+
i3o4 = i3o4.floor if floor
|
35
|
+
addon = floor ? words_count % 3 : 0
|
36
|
+
position_infocod([i3o4, i3o4, i3o4, addon])
|
37
|
+
end
|
38
|
+
|
39
|
+
def icdThc(words_count, floor: false)
|
40
|
+
icd3of4(words_count, floor: floor)
|
41
|
+
end
|
42
|
+
|
43
|
+
def icdTlc(words_count, floor: false)
|
44
|
+
io = words_count / 6.0
|
45
|
+
io = io.floor if floor
|
46
|
+
position_infocod([2*io, 2*io, io, io])
|
47
|
+
end
|
48
|
+
|
49
|
+
def scale(value, relative_to:)
|
50
|
+
( (value - relative_to) / relative_to ).abs
|
51
|
+
end
|
52
|
+
|
53
|
+
class Bioinform::MotifModel::PCM
|
54
|
+
def get_logo(icd_mode)
|
55
|
+
case icd_mode.to_s
|
56
|
+
when 'weblogo'
|
57
|
+
get_logo_weblogo
|
58
|
+
when 'discrete'
|
59
|
+
get_logo_discrete
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_logo_weblogo
|
64
|
+
each_position.map{|position|
|
65
|
+
word_count = position.inject(0.0, &:+)
|
66
|
+
inf_content = position.map{|el|
|
67
|
+
(el == 0) ? 0 : (el / word_count) * Math.log2(el / word_count)
|
68
|
+
}.inject(0.0, :+) + 2
|
69
|
+
position.map{|el| (el / word_count) * inf_content / 2 }
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_logo_discrete
|
74
|
+
each_position.map{|position|
|
75
|
+
word_count = position.inject(0.0, &:+)
|
76
|
+
icd4of4 = icd4of4(word_count)
|
77
|
+
inf_content = (icd4of4 == 0) ? 1.0 : scale(position_infocod(position), relative_to: icd4of4)
|
78
|
+
position.map{|el| (el / word_count) * inf_content }
|
79
|
+
}
|
80
|
+
end
|
81
|
+
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
class Object
|
2
|
-
def deep_dup
|
3
|
-
Marshal.load(Marshal.dump(self))
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
def get_ppm_from_file(in_file_name)
|
8
|
-
case File.ext_wo_name(in_file_name)
|
9
|
-
when 'pat', 'pcm'
|
10
|
-
pm = PM.load(in_file_name)
|
11
|
-
pm.fixwc if pm.words_count
|
12
|
-
when 'mfa', 'fasta', 'plain'
|
13
|
-
pm = PM.new_pcm(Ytilib.read_seqs2array(in_file_name))
|
14
|
-
when 'xml'
|
15
|
-
pm = PM.from_bismark(Bismark.new(in_file_name).elements["//PPM"])
|
16
|
-
when in_file_name
|
17
|
-
pm = PPM.from_IUPAC(in_file_name.upcase)
|
18
|
-
end
|
19
|
-
pm.get_ppm
|
20
|
-
rescue
|
21
|
-
nil
|
22
|
-
end
|
23
|
-
|
24
|
-
class PPM
|
25
|
-
attr_accessor :name
|
26
|
-
|
27
|
-
def get_ppm
|
28
|
-
self
|
29
|
-
end
|
30
|
-
|
31
|
-
def get_line(v)
|
32
|
-
( (v - icd4of4) / icd4of4 ).abs
|
33
|
-
end
|
34
|
-
|
35
|
-
def get_logo(icd_mode)
|
36
|
-
case icd_mode.to_s
|
37
|
-
when 'weblogo'
|
38
|
-
get_logo_weblogo
|
39
|
-
when 'discrete'
|
40
|
-
get_logo_discrete
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
def get_logo_weblogo
|
46
|
-
rseq = each_position.map {|position|
|
47
|
-
position.map{|el| (el == 0) ? 0 : el * Math.log2(el) }.inject(0, :+) + 2
|
48
|
-
}
|
49
|
-
|
50
|
-
each_position.with_index.map {|position, ind|
|
51
|
-
position.map{|el| el * rseq[ind] / 2 }
|
52
|
-
}
|
53
|
-
end
|
54
|
-
|
55
|
-
def get_logo_discrete
|
56
|
-
checkerr("words count is undefined") { !words_count }
|
57
|
-
|
58
|
-
rseq = each_position_index.map {|i|
|
59
|
-
(icd4of4 == 0) ? 1.0 : get_line(infocod(i))
|
60
|
-
}
|
61
|
-
|
62
|
-
each_position.with_index.map {|position, ind|
|
63
|
-
position.map{|el| el * rseq[ind] }
|
64
|
-
}
|
65
|
-
end
|
66
|
-
|
67
|
-
def revcomp
|
68
|
-
deep_dup.revcomp!
|
69
|
-
end
|
70
|
-
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequence_logo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-09-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rmagick
|
@@ -69,23 +69,15 @@ files:
|
|
69
69
|
- lib/sequence_logo/data_models/predefined_logo.rb
|
70
70
|
- lib/sequence_logo/data_models/sequence.rb
|
71
71
|
- lib/sequence_logo/data_models/sequence_with_snp.rb
|
72
|
+
- lib/sequence_logo/di_pm.rb
|
72
73
|
- lib/sequence_logo/exec/glue_logos.rb
|
73
74
|
- lib/sequence_logo/exec/sequence_logo.rb
|
74
75
|
- lib/sequence_logo/magick_support.rb
|
75
76
|
- lib/sequence_logo/pmflogo_lib.rb
|
77
|
+
- lib/sequence_logo/ppm_support.rb
|
76
78
|
- lib/sequence_logo/support.rb
|
77
79
|
- lib/sequence_logo/version.rb
|
78
|
-
- lib/sequence_logo/ytilib.rb
|
79
|
-
- lib/sequence_logo/ytilib/addon.rb
|
80
|
-
- lib/sequence_logo/ytilib/bismark.rb
|
81
|
-
- lib/sequence_logo/ytilib/hack1.rb
|
82
|
-
- lib/sequence_logo/ytilib/infocod.rb
|
83
|
-
- lib/sequence_logo/ytilib/iupac.rb
|
84
|
-
- lib/sequence_logo/ytilib/pm.rb
|
85
|
-
- lib/sequence_logo/ytilib/pmsd.rb
|
86
80
|
- lib/sequence_logo/ytilib/ppm_support.rb
|
87
|
-
- lib/sequence_logo/ytilib/randoom.rb
|
88
|
-
- lib/sequence_logo/ytilib/ytilib.rb
|
89
81
|
- sequence_logo.gemspec
|
90
82
|
- test/data/logo/AHR_si_direct.png
|
91
83
|
- test/data/logo/AHR_si_revcomp.png
|
@@ -112,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
104
|
version: '0'
|
113
105
|
requirements: []
|
114
106
|
rubyforge_project:
|
115
|
-
rubygems_version: 2.
|
107
|
+
rubygems_version: 2.4.6
|
116
108
|
signing_key:
|
117
109
|
specification_version: 4
|
118
110
|
summary: Tool for drawing sequence logos of motifs
|