macroape 3.3.1 → 3.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/macroape/exec/eval_alignment.rb +3 -16
- data/lib/macroape/exec/preprocess_collection.rb +14 -9
- data/lib/macroape/exec/scan_collection.rb +0 -2
- data/lib/macroape/pwm_compare.rb +9 -24
- data/lib/macroape/pwm_compare_aligned.rb +79 -5
- data/lib/macroape/threshold_by_pvalue.rb +4 -0
- data/lib/macroape/version.rb +1 -1
- data/macroape.gemspec +1 -1
- data/test/data/test_collection/KLF4_f2.pat +0 -1
- data/test/preprocess_collection_test.rb +3 -1
- metadata +4 -4
@@ -114,25 +114,12 @@ begin
|
|
114
114
|
|
115
115
|
pwm_second.reverse_complement! if reverse
|
116
116
|
|
117
|
-
|
118
|
-
|
119
|
-
overlap = [pwm_first.length + [-shift,0].max, pwm_second.length + [shift,0].max].min - shift.abs
|
120
|
-
alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
|
121
|
-
(first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
|
122
|
-
(second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
|
123
|
-
|
124
|
-
cmp = Macroape::PWMCompareAligned.new(pwm_first.left_augment([-shift,0].max),
|
125
|
-
pwm_second.left_augment([shift,0].max))
|
126
|
-
|
117
|
+
cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
|
118
|
+
|
127
119
|
first_threshold = pwm_first.threshold(pvalue)
|
128
120
|
second_threshold = pwm_second.threshold(pvalue)
|
129
121
|
|
130
|
-
info = cmp.jaccard(first_threshold, second_threshold)
|
131
|
-
text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
|
132
|
-
shift: shift,
|
133
|
-
orientation: orientation,
|
134
|
-
overlap: overlap,
|
135
|
-
alignment_length: alignment_length)
|
122
|
+
info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
|
136
123
|
|
137
124
|
puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
|
138
125
|
|
@@ -74,18 +74,23 @@ begin
|
|
74
74
|
Dir.glob(File.join(folder,'*')) do |filename|
|
75
75
|
STDERR.puts filename unless silent
|
76
76
|
pwm = Bioinform::PWM.new(File.read(filename))
|
77
|
+
pwm.name ||= File.basename(filename, File.extname(filename))
|
78
|
+
|
79
|
+
# When support of onefile collections is introduced - then here should be check if name exists.
|
80
|
+
# Otherwise it should skip motif and tell you about this
|
81
|
+
# Also two command line options to fail on skipping or to skip silently should be included
|
82
|
+
|
77
83
|
info = {rough: {}, precise: {}}
|
78
|
-
|
79
|
-
output.each do |line|
|
80
|
-
pvalue, threshold, real_pvalue = line.split.map(&:to_f)
|
81
|
-
info[:rough][pvalue] = threshold
|
82
|
-
end
|
84
|
+
pwm.background(background)
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
pvalue, threshold, real_pvalue = line.split.map(&:to_f)
|
87
|
-
info[:precise][pvalue] = threshold
|
86
|
+
pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
87
|
+
info[:rough][pvalue] = threshold / rough_discretization
|
88
88
|
end
|
89
|
+
|
90
|
+
pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
91
|
+
info[:precise][pvalue] = threshold / precise_discretization
|
92
|
+
end
|
93
|
+
|
89
94
|
collection.add_pwm(pwm, info)
|
90
95
|
end
|
91
96
|
File.open(output_file,'w') do |f|
|
@@ -94,7 +94,6 @@ begin
|
|
94
94
|
|
95
95
|
similarities = {}
|
96
96
|
precision_file_mode = {}
|
97
|
-
unnamed_index = 0
|
98
97
|
|
99
98
|
collection.pwms.each_key do |name|
|
100
99
|
pwm = collection.pwms[name]
|
@@ -102,7 +101,6 @@ begin
|
|
102
101
|
STDERR.puts pwm.name unless silent
|
103
102
|
cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
|
104
103
|
info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
|
105
|
-
name = pwm.name || "Unnamed #{unnamed_index += 1}"
|
106
104
|
precision_file_mode[name] = :rough
|
107
105
|
|
108
106
|
if precision_mode == :precise and info[:similarity] >= minimal_similarity
|
data/lib/macroape/pwm_compare.rb
CHANGED
@@ -7,35 +7,20 @@ module Macroape
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def jaccard(threshold_first, threshold_second)
|
10
|
-
self.
|
11
|
-
|
12
|
-
end.max_by {|
|
10
|
+
self.map_each_alignment do |alignment|
|
11
|
+
alignment.alignment_infos.merge( alignment.jaccard(threshold_first, threshold_second) )
|
12
|
+
end.max_by {|alignment_infos| alignment_infos[:similarity] }
|
13
13
|
end
|
14
|
-
|
15
|
-
|
16
|
-
def each
|
14
|
+
|
15
|
+
def each_alignment
|
17
16
|
second_rc = second.reverse_complement
|
18
17
|
(-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
|
19
|
-
|
20
|
-
second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * second.length
|
21
|
-
overlap = [first.length + [-shift,0].max, second.length + [shift,0].max].min - shift.abs
|
22
|
-
alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
|
23
|
-
(first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
|
24
|
-
(second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
|
25
|
-
|
26
|
-
yield(PWMCompareAligned.new(first.left_augment([-shift,0].max),
|
27
|
-
(orientation == :direct ? second : second_rc).left_augment([shift,0].max)),
|
28
|
-
text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
|
29
|
-
shift: shift,
|
30
|
-
orientation: orientation,
|
31
|
-
overlap: overlap,
|
32
|
-
alignment_length: alignment_length
|
33
|
-
)
|
18
|
+
yield PWMCompareAligned.new(first, (orientation == :direct ? second : second_rc), shift, orientation)
|
34
19
|
end
|
35
20
|
end
|
36
|
-
include Enumerable
|
37
|
-
alias :each_align :each
|
38
|
-
alias :map_each_align :map
|
39
21
|
|
22
|
+
include Enumerable
|
23
|
+
alias_method :each, :each_alignment
|
24
|
+
alias_method :map_each_alignment, :map
|
40
25
|
end
|
41
26
|
end
|
@@ -2,16 +2,89 @@ require 'macroape/aligned_pair_intersection'
|
|
2
2
|
|
3
3
|
module Macroape
|
4
4
|
class PWMCompareAligned
|
5
|
-
attr_reader :first, :second, :length
|
6
|
-
def initialize(first, second)
|
5
|
+
attr_reader :first, :second, :length, :shift, :orientation, :unaligned_first, :unaligned_second
|
6
|
+
def initialize(first, second, shift, orientation)
|
7
|
+
@unaligned_first, @unaligned_second = first, second
|
8
|
+
@shift, @orientation = shift, orientation
|
9
|
+
if shift > 0
|
10
|
+
first, second = first, second.left_augment(shift)
|
11
|
+
else
|
12
|
+
first, second = first.left_augment(-shift), second
|
13
|
+
end
|
7
14
|
@length = [first.length, second.length].max
|
8
15
|
@first = first.right_augment(@length - first.length)
|
9
16
|
@second = second.right_augment(@length - second.length)
|
10
17
|
end
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
19
|
+
def direct?
|
20
|
+
orientation == :direct
|
21
|
+
end
|
22
|
+
def revcomp?
|
23
|
+
orientation == :revcomp
|
24
|
+
end
|
25
|
+
|
26
|
+
def overlap
|
27
|
+
length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def first_pwm_alignment
|
31
|
+
length.times.map do |pos|
|
32
|
+
if first_overlaps?(pos)
|
33
|
+
'>'
|
34
|
+
else
|
35
|
+
'.'
|
36
|
+
end
|
37
|
+
end.join
|
38
|
+
end
|
39
|
+
|
40
|
+
def second_pwm_alignment
|
41
|
+
length.times.map do |pos|
|
42
|
+
if second_overlaps?(pos)
|
43
|
+
direct? ? '>' : '<'
|
44
|
+
else
|
45
|
+
'.'
|
46
|
+
end
|
47
|
+
end.join
|
48
|
+
end
|
49
|
+
|
50
|
+
def alignment_infos
|
51
|
+
{shift: shift,
|
52
|
+
orientation: orientation,
|
53
|
+
text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
|
54
|
+
overlap: overlap,
|
55
|
+
alignment_length: length}
|
56
|
+
end
|
57
|
+
|
58
|
+
def first_length
|
59
|
+
unaligned_first.length
|
60
|
+
end
|
61
|
+
def second_length
|
62
|
+
unaligned_second.length
|
63
|
+
end
|
64
|
+
|
65
|
+
# whether first matrix overlap specified position
|
66
|
+
def first_overlaps?(pos)
|
67
|
+
return false unless pos >= 0 && pos < length
|
68
|
+
if shift > 0
|
69
|
+
pos < first_length
|
70
|
+
else
|
71
|
+
pos >= -shift && pos < -shift + first_length
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def second_overlaps?(pos)
|
76
|
+
return false unless pos >= 0 && pos < length
|
77
|
+
if shift > 0
|
78
|
+
pos >= shift && pos < shift + second_length
|
79
|
+
else
|
80
|
+
pos < second_length
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
=begin
|
85
|
+
def discrete(rate)
|
86
|
+
PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
|
87
|
+
end
|
15
88
|
|
16
89
|
def sort_pair_of_matrices_by(&block)
|
17
90
|
mat = first.pwm.zip(second.pwm).sort_by(&block).transpose
|
@@ -26,6 +99,7 @@ module Macroape
|
|
26
99
|
def permute_columns(permutation_index)
|
27
100
|
PWMCompareAligned.new(first.permute(permutation_index), second.permute(permutation_index))
|
28
101
|
end
|
102
|
+
=end
|
29
103
|
|
30
104
|
def jaccard(first_threshold, second_threshold)
|
31
105
|
f = first.counts_by_thresholds(first_threshold).first
|
data/lib/macroape/version.rb
CHANGED
data/macroape.gemspec
CHANGED
@@ -5,7 +5,9 @@ require 'macroape'
|
|
5
5
|
class TestPreprocessCollection < Test::Unit::TestCase
|
6
6
|
def test_multipvalue_preproceessing
|
7
7
|
system(Helpers.exec_cmd('preprocess_collection','test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent'))
|
8
|
-
|
8
|
+
# Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.93 p194
|
9
|
+
# it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
|
10
|
+
assert_equal YAML.load(File.read('test/data/test_collection.yaml')), YAML.load(File.read('test/data/test_collection.yaml.tmp'))
|
9
11
|
File.delete 'test/data/test_collection.yaml.tmp'
|
10
12
|
end
|
11
13
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macroape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bioinform
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0.1.
|
21
|
+
version: 0.1.2
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.1.
|
29
|
+
version: 0.1.2
|
30
30
|
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
31
31
|
Estimation. It's a bioinformatic tool for evaluating similarity measure and best
|
32
32
|
alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
|