macroape 3.3.1 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/macroape/exec/eval_alignment.rb +3 -16
- data/lib/macroape/exec/preprocess_collection.rb +14 -9
- data/lib/macroape/exec/scan_collection.rb +0 -2
- data/lib/macroape/pwm_compare.rb +9 -24
- data/lib/macroape/pwm_compare_aligned.rb +79 -5
- data/lib/macroape/threshold_by_pvalue.rb +4 -0
- data/lib/macroape/version.rb +1 -1
- data/macroape.gemspec +1 -1
- data/test/data/test_collection/KLF4_f2.pat +0 -1
- data/test/preprocess_collection_test.rb +3 -1
- metadata +4 -4
@@ -114,25 +114,12 @@ begin
|
|
114
114
|
|
115
115
|
pwm_second.reverse_complement! if reverse
|
116
116
|
|
117
|
-
|
118
|
-
|
119
|
-
overlap = [pwm_first.length + [-shift,0].max, pwm_second.length + [shift,0].max].min - shift.abs
|
120
|
-
alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
|
121
|
-
(first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
|
122
|
-
(second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
|
123
|
-
|
124
|
-
cmp = Macroape::PWMCompareAligned.new(pwm_first.left_augment([-shift,0].max),
|
125
|
-
pwm_second.left_augment([shift,0].max))
|
126
|
-
|
117
|
+
cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
|
118
|
+
|
127
119
|
first_threshold = pwm_first.threshold(pvalue)
|
128
120
|
second_threshold = pwm_second.threshold(pvalue)
|
129
121
|
|
130
|
-
info = cmp.jaccard(first_threshold, second_threshold)
|
131
|
-
text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
|
132
|
-
shift: shift,
|
133
|
-
orientation: orientation,
|
134
|
-
overlap: overlap,
|
135
|
-
alignment_length: alignment_length)
|
122
|
+
info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
|
136
123
|
|
137
124
|
puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
|
138
125
|
|
@@ -74,18 +74,23 @@ begin
|
|
74
74
|
Dir.glob(File.join(folder,'*')) do |filename|
|
75
75
|
STDERR.puts filename unless silent
|
76
76
|
pwm = Bioinform::PWM.new(File.read(filename))
|
77
|
+
pwm.name ||= File.basename(filename, File.extname(filename))
|
78
|
+
|
79
|
+
# When support of onefile collections is introduced - then here should be check if name exists.
|
80
|
+
# Otherwise it should skip motif and tell you about this
|
81
|
+
# Also two command line options to fail on skipping or to skip silently should be included
|
82
|
+
|
77
83
|
info = {rough: {}, precise: {}}
|
78
|
-
|
79
|
-
output.each do |line|
|
80
|
-
pvalue, threshold, real_pvalue = line.split.map(&:to_f)
|
81
|
-
info[:rough][pvalue] = threshold
|
82
|
-
end
|
84
|
+
pwm.background(background)
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
pvalue, threshold, real_pvalue = line.split.map(&:to_f)
|
87
|
-
info[:precise][pvalue] = threshold
|
86
|
+
pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
87
|
+
info[:rough][pvalue] = threshold / rough_discretization
|
88
88
|
end
|
89
|
+
|
90
|
+
pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
91
|
+
info[:precise][pvalue] = threshold / precise_discretization
|
92
|
+
end
|
93
|
+
|
89
94
|
collection.add_pwm(pwm, info)
|
90
95
|
end
|
91
96
|
File.open(output_file,'w') do |f|
|
@@ -94,7 +94,6 @@ begin
|
|
94
94
|
|
95
95
|
similarities = {}
|
96
96
|
precision_file_mode = {}
|
97
|
-
unnamed_index = 0
|
98
97
|
|
99
98
|
collection.pwms.each_key do |name|
|
100
99
|
pwm = collection.pwms[name]
|
@@ -102,7 +101,6 @@ begin
|
|
102
101
|
STDERR.puts pwm.name unless silent
|
103
102
|
cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
|
104
103
|
info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
|
105
|
-
name = pwm.name || "Unnamed #{unnamed_index += 1}"
|
106
104
|
precision_file_mode[name] = :rough
|
107
105
|
|
108
106
|
if precision_mode == :precise and info[:similarity] >= minimal_similarity
|
data/lib/macroape/pwm_compare.rb
CHANGED
@@ -7,35 +7,20 @@ module Macroape
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def jaccard(threshold_first, threshold_second)
|
10
|
-
self.
|
11
|
-
|
12
|
-
end.max_by {|
|
10
|
+
self.map_each_alignment do |alignment|
|
11
|
+
alignment.alignment_infos.merge( alignment.jaccard(threshold_first, threshold_second) )
|
12
|
+
end.max_by {|alignment_infos| alignment_infos[:similarity] }
|
13
13
|
end
|
14
|
-
|
15
|
-
|
16
|
-
def each
|
14
|
+
|
15
|
+
def each_alignment
|
17
16
|
second_rc = second.reverse_complement
|
18
17
|
(-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
|
19
|
-
|
20
|
-
second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * second.length
|
21
|
-
overlap = [first.length + [-shift,0].max, second.length + [shift,0].max].min - shift.abs
|
22
|
-
alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
|
23
|
-
(first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
|
24
|
-
(second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
|
25
|
-
|
26
|
-
yield(PWMCompareAligned.new(first.left_augment([-shift,0].max),
|
27
|
-
(orientation == :direct ? second : second_rc).left_augment([shift,0].max)),
|
28
|
-
text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
|
29
|
-
shift: shift,
|
30
|
-
orientation: orientation,
|
31
|
-
overlap: overlap,
|
32
|
-
alignment_length: alignment_length
|
33
|
-
)
|
18
|
+
yield PWMCompareAligned.new(first, (orientation == :direct ? second : second_rc), shift, orientation)
|
34
19
|
end
|
35
20
|
end
|
36
|
-
include Enumerable
|
37
|
-
alias :each_align :each
|
38
|
-
alias :map_each_align :map
|
39
21
|
|
22
|
+
include Enumerable
|
23
|
+
alias_method :each, :each_alignment
|
24
|
+
alias_method :map_each_alignment, :map
|
40
25
|
end
|
41
26
|
end
|
@@ -2,16 +2,89 @@ require 'macroape/aligned_pair_intersection'
|
|
2
2
|
|
3
3
|
module Macroape
|
4
4
|
class PWMCompareAligned
|
5
|
-
attr_reader :first, :second, :length
|
6
|
-
def initialize(first, second)
|
5
|
+
attr_reader :first, :second, :length, :shift, :orientation, :unaligned_first, :unaligned_second
|
6
|
+
def initialize(first, second, shift, orientation)
|
7
|
+
@unaligned_first, @unaligned_second = first, second
|
8
|
+
@shift, @orientation = shift, orientation
|
9
|
+
if shift > 0
|
10
|
+
first, second = first, second.left_augment(shift)
|
11
|
+
else
|
12
|
+
first, second = first.left_augment(-shift), second
|
13
|
+
end
|
7
14
|
@length = [first.length, second.length].max
|
8
15
|
@first = first.right_augment(@length - first.length)
|
9
16
|
@second = second.right_augment(@length - second.length)
|
10
17
|
end
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
19
|
+
def direct?
|
20
|
+
orientation == :direct
|
21
|
+
end
|
22
|
+
def revcomp?
|
23
|
+
orientation == :revcomp
|
24
|
+
end
|
25
|
+
|
26
|
+
def overlap
|
27
|
+
length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def first_pwm_alignment
|
31
|
+
length.times.map do |pos|
|
32
|
+
if first_overlaps?(pos)
|
33
|
+
'>'
|
34
|
+
else
|
35
|
+
'.'
|
36
|
+
end
|
37
|
+
end.join
|
38
|
+
end
|
39
|
+
|
40
|
+
def second_pwm_alignment
|
41
|
+
length.times.map do |pos|
|
42
|
+
if second_overlaps?(pos)
|
43
|
+
direct? ? '>' : '<'
|
44
|
+
else
|
45
|
+
'.'
|
46
|
+
end
|
47
|
+
end.join
|
48
|
+
end
|
49
|
+
|
50
|
+
def alignment_infos
|
51
|
+
{shift: shift,
|
52
|
+
orientation: orientation,
|
53
|
+
text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
|
54
|
+
overlap: overlap,
|
55
|
+
alignment_length: length}
|
56
|
+
end
|
57
|
+
|
58
|
+
def first_length
|
59
|
+
unaligned_first.length
|
60
|
+
end
|
61
|
+
def second_length
|
62
|
+
unaligned_second.length
|
63
|
+
end
|
64
|
+
|
65
|
+
# whether first matrix overlap specified position
|
66
|
+
def first_overlaps?(pos)
|
67
|
+
return false unless pos >= 0 && pos < length
|
68
|
+
if shift > 0
|
69
|
+
pos < first_length
|
70
|
+
else
|
71
|
+
pos >= -shift && pos < -shift + first_length
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def second_overlaps?(pos)
|
76
|
+
return false unless pos >= 0 && pos < length
|
77
|
+
if shift > 0
|
78
|
+
pos >= shift && pos < shift + second_length
|
79
|
+
else
|
80
|
+
pos < second_length
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
=begin
|
85
|
+
def discrete(rate)
|
86
|
+
PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
|
87
|
+
end
|
15
88
|
|
16
89
|
def sort_pair_of_matrices_by(&block)
|
17
90
|
mat = first.pwm.zip(second.pwm).sort_by(&block).transpose
|
@@ -26,6 +99,7 @@ module Macroape
|
|
26
99
|
def permute_columns(permutation_index)
|
27
100
|
PWMCompareAligned.new(first.permute(permutation_index), second.permute(permutation_index))
|
28
101
|
end
|
102
|
+
=end
|
29
103
|
|
30
104
|
def jaccard(first_threshold, second_threshold)
|
31
105
|
f = first.counts_by_thresholds(first_threshold).first
|
data/lib/macroape/version.rb
CHANGED
data/macroape.gemspec
CHANGED
@@ -5,7 +5,9 @@ require 'macroape'
|
|
5
5
|
class TestPreprocessCollection < Test::Unit::TestCase
|
6
6
|
def test_multipvalue_preproceessing
|
7
7
|
system(Helpers.exec_cmd('preprocess_collection','test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent'))
|
8
|
-
|
8
|
+
# Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.93 p194
|
9
|
+
# it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
|
10
|
+
assert_equal YAML.load(File.read('test/data/test_collection.yaml')), YAML.load(File.read('test/data/test_collection.yaml.tmp'))
|
9
11
|
File.delete 'test/data/test_collection.yaml.tmp'
|
10
12
|
end
|
11
13
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macroape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bioinform
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0.1.
|
21
|
+
version: 0.1.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.1.
|
29
|
+
version: 0.1.2
|
30
30
|
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
31
31
|
Estimation. It's a bioinformatic tool for evaluating similarity measure and best
|
32
32
|
alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
|