macroape 3.2.2 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +28 -7
- data/lib/macroape.rb +14 -26
- data/lib/macroape/aligned_pair_intersection.rb +24 -24
- data/lib/macroape/collection.rb +1 -2
- data/lib/macroape/count_by_threshold.rb +8 -26
- data/lib/macroape/exec/eval_alignment.rb +19 -19
- data/lib/macroape/exec/eval_similarity.rb +18 -16
- data/lib/macroape/exec/find_pvalue.rb +8 -6
- data/lib/macroape/exec/find_threshold.rb +7 -5
- data/lib/macroape/exec/preprocess_collection.rb +10 -7
- data/lib/macroape/exec/scan_collection.rb +13 -10
- data/lib/macroape/pwm_compare.rb +33 -2
- data/lib/macroape/pwm_compare_aligned.rb +38 -2
- data/lib/macroape/threshold_by_pvalue.rb +48 -43
- data/lib/macroape/version.rb +3 -3
- data/macroape.gemspec +2 -0
- data/test/data/test_collection.yaml +70 -4
- data/test/eval_alignment_similarity_test.rb +19 -0
- data/test/eval_similarity_test.rb +26 -0
- data/test/find_pvalue_test.rb +25 -0
- data/test/find_threshold_test.rb +29 -0
- data/test/preprocess_collection_test.rb +9 -0
- data/test/scan_collection_test.rb +17 -0
- data/test/test_helper.rb +10 -0
- metadata +33 -16
- data/lib/macroape/aligned_pair_metrics.rb +0 -24
- data/lib/macroape/aligned_pair_transformations.rb +0 -23
- data/lib/macroape/extract_pwm.rb +0 -32
- data/lib/macroape/gauss_estimation.rb +0 -30
- data/lib/macroape/matrix_information.rb +0 -29
- data/lib/macroape/matrix_on_background.rb +0 -16
- data/lib/macroape/matrix_transformations.rb +0 -29
- data/lib/macroape/pair_metrics.rb +0 -9
- data/lib/macroape/pair_transformations.rb +0 -28
- data/lib/macroape/single_matrix.rb +0 -45
- data/lib/macroape/support.rb +0 -34
- data/test/macroape_test.rb +0 -125
data/Rakefile
CHANGED
@@ -1,7 +1,28 @@
|
|
1
|
-
#!/usr/bin/env rake
|
2
|
-
require "bundler/gem_tasks"
|
3
|
-
|
4
|
-
|
5
|
-
task :
|
6
|
-
|
7
|
-
end
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
namespace :spec do
|
5
|
+
task :find_threshold do
|
6
|
+
system("ruby -I ./test test/find_threshold_test.rb")
|
7
|
+
end
|
8
|
+
task :find_pvalue do
|
9
|
+
system("ruby -I ./test test/find_pvalue_test.rb")
|
10
|
+
end
|
11
|
+
task :eval_similarity do
|
12
|
+
system("ruby -I ./test test/eval_similarity_test.rb")
|
13
|
+
end
|
14
|
+
task :eval_alignment_similarity do
|
15
|
+
system("ruby -I ./test test/eval_alignment_similarity_test.rb")
|
16
|
+
end
|
17
|
+
task :preprocess_collection do
|
18
|
+
system("ruby -I ./test test/preprocess_collection_test.rb")
|
19
|
+
end
|
20
|
+
task :scan_collection do
|
21
|
+
system("ruby -I ./test test/scan_collection_test.rb")
|
22
|
+
end
|
23
|
+
task :all => [:find_threshold, :find_pvalue, :eval_similarity,
|
24
|
+
:eval_alignment_similarity, :scan_collection, :preprocess_collection]
|
25
|
+
end
|
26
|
+
|
27
|
+
desc 'Test all functionality of gem executables'
|
28
|
+
task :spec => ['spec:all']
|
data/lib/macroape.rb
CHANGED
@@ -1,26 +1,14 @@
|
|
1
|
-
require 'macroape/version'
|
2
|
-
|
3
|
-
|
4
|
-
require 'macroape/
|
5
|
-
require 'macroape/
|
6
|
-
|
7
|
-
require 'macroape/
|
8
|
-
require 'macroape/
|
9
|
-
require 'macroape/
|
10
|
-
require 'macroape/
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
require 'macroape/aligned_pair_metrics'
|
16
|
-
require 'macroape/aligned_pair_intersection'
|
17
|
-
require 'macroape/pwm_compare_aligned'
|
18
|
-
|
19
|
-
require 'macroape/pair_transformations'
|
20
|
-
require 'macroape/pair_metrics'
|
21
|
-
require 'macroape/pwm_compare'
|
22
|
-
require 'macroape/collection'
|
23
|
-
|
24
|
-
module Macroape
|
25
|
-
# Your code goes here...
|
26
|
-
end
|
1
|
+
require 'macroape/version'
|
2
|
+
|
3
|
+
require 'bioinform'
|
4
|
+
require 'macroape/threshold_by_pvalue'
|
5
|
+
require 'macroape/count_by_threshold'
|
6
|
+
|
7
|
+
require 'macroape/aligned_pair_intersection'
|
8
|
+
require 'macroape/pwm_compare_aligned'
|
9
|
+
require 'macroape/pwm_compare'
|
10
|
+
require 'macroape/collection'
|
11
|
+
|
12
|
+
module Macroape
|
13
|
+
# Your code goes here...
|
14
|
+
end
|
@@ -1,9 +1,9 @@
|
|
1
|
-
module
|
2
|
-
|
1
|
+
module Macroape
|
2
|
+
class PWMCompareAligned
|
3
3
|
|
4
4
|
def counts_for_two_matrices(threshold_first, threshold_second)
|
5
|
-
if first.
|
6
|
-
if first.
|
5
|
+
if first.background == second.background
|
6
|
+
if first.background == [1,1,1,1]
|
7
7
|
common_words_for_two_matrices(threshold_first, threshold_second)
|
8
8
|
else
|
9
9
|
counts_for_two_matrices_with_same_probabilities(threshold_first, threshold_second)
|
@@ -18,8 +18,8 @@ module PWMCompare
|
|
18
18
|
result_first = 0.0
|
19
19
|
result_second = 0.0
|
20
20
|
length.times do |column|
|
21
|
-
ending_weight_first = first.
|
22
|
-
ending_weight_second = second.
|
21
|
+
ending_weight_first = first.background_sum ** (length - column - 1)
|
22
|
+
ending_weight_second = second.background_sum ** (length - column - 1)
|
23
23
|
already_enough_first = threshold_first - first.worst_suffix[column + 1]
|
24
24
|
already_enough_second = threshold_second - second.worst_suffix[column + 1]
|
25
25
|
least_sufficient_first = threshold_first - first.best_suffix[column + 1]
|
@@ -33,23 +33,23 @@ module PWMCompare
|
|
33
33
|
if new_score_first >= already_enough_first
|
34
34
|
new_score_second = score_second + second.matrix[column][letter]
|
35
35
|
if new_score_second >= already_enough_second
|
36
|
-
result_first += count[0] * first.
|
37
|
-
result_second += count[1] * second.
|
36
|
+
result_first += count[0] * first.background[letter] * ending_weight_first
|
37
|
+
result_second += count[1] * second.background[letter] * ending_weight_second
|
38
38
|
elsif new_score_second >= least_sufficient_second
|
39
|
-
new_scores[new_score_first][new_score_second][0] += count[0] * first.
|
40
|
-
new_scores[new_score_first][new_score_second][1] += count[1] * second.
|
39
|
+
new_scores[new_score_first][new_score_second][0] += count[0] * first.background[letter]
|
40
|
+
new_scores[new_score_first][new_score_second][1] += count[1] * second.background[letter]
|
41
41
|
end
|
42
42
|
elsif new_score_first >= least_sufficient_first
|
43
43
|
new_score_second = score_second + second.matrix[column][letter]
|
44
44
|
if new_score_second >= least_sufficient_second
|
45
|
-
new_scores[new_score_first][new_score_second][0] += count[0] * first.
|
46
|
-
new_scores[new_score_first][new_score_second][1] += count[1] * second.
|
45
|
+
new_scores[new_score_first][new_score_second][0] += count[0] * first.background[letter]
|
46
|
+
new_scores[new_score_first][new_score_second][1] += count[1] * second.background[letter]
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
50
50
|
end
|
51
51
|
end
|
52
|
-
raise 'Hash overflow in
|
52
|
+
raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities' if new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSizeDouble
|
53
53
|
scores = new_scores
|
54
54
|
end
|
55
55
|
[result_first, result_second]
|
@@ -58,9 +58,9 @@ module PWMCompare
|
|
58
58
|
def counts_for_two_matrices_with_same_probabilities(threshold_first, threshold_second)
|
59
59
|
scores = { 0 => {0 => 1} } # scores_on_first_pwm, scores_on_second_pwm --> count_on_first_probabilities, count_on_second_probabilities
|
60
60
|
result = 0.0
|
61
|
-
|
61
|
+
background = first.background
|
62
62
|
length.times do |column|
|
63
|
-
ending_weight = first.
|
63
|
+
ending_weight = first.background_sum ** (length - column - 1)
|
64
64
|
already_enough_first = threshold_first - first.worst_suffix[column + 1]
|
65
65
|
already_enough_second = threshold_second - second.worst_suffix[column + 1]
|
66
66
|
least_sufficient_first = threshold_first - first.best_suffix[column + 1]
|
@@ -74,20 +74,20 @@ module PWMCompare
|
|
74
74
|
if new_score_first >= already_enough_first
|
75
75
|
new_score_second = score_second + second.matrix[column][letter]
|
76
76
|
if new_score_second >= already_enough_second
|
77
|
-
result += count *
|
77
|
+
result += count * background[letter] * ending_weight
|
78
78
|
elsif new_score_second >= least_sufficient_second
|
79
|
-
new_scores[new_score_first][new_score_second] += count *
|
79
|
+
new_scores[new_score_first][new_score_second] += count * background[letter]
|
80
80
|
end
|
81
81
|
elsif new_score_first >= least_sufficient_first
|
82
82
|
new_score_second = score_second + second.matrix[column][letter]
|
83
83
|
if new_score_second >= least_sufficient_second
|
84
|
-
new_scores[new_score_first][new_score_second] += count *
|
84
|
+
new_scores[new_score_first][new_score_second] += count * background[letter]
|
85
85
|
end
|
86
86
|
end
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|
90
|
-
raise 'Hash overflow in
|
90
|
+
raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_same_probabilities' if new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSizeDouble
|
91
91
|
scores = new_scores
|
92
92
|
end
|
93
93
|
[result, result]
|
@@ -97,7 +97,7 @@ module PWMCompare
|
|
97
97
|
def common_words_for_two_matrices(threshold_first, threshold_second)
|
98
98
|
scores = { 0 => {0 => 1} } # scores_on_first_pwm, scores_on_second_pwm --> count_on_first_probabilities, count_on_second_probabilities
|
99
99
|
result = 0
|
100
|
-
length.times do |column|
|
100
|
+
length.times do |column|
|
101
101
|
ending_weight = 4 ** (length - column - 1)
|
102
102
|
already_enough_first = threshold_first - first.worst_suffix[column + 1]
|
103
103
|
already_enough_second = threshold_second - second.worst_suffix[column + 1]
|
@@ -107,13 +107,13 @@ module PWMCompare
|
|
107
107
|
new_scores = Hash.new{|h,k| h[k]=Hash.new{|h2,k2| h2[k2]=0} }
|
108
108
|
scores.each do |score_first, second_scores|
|
109
109
|
second_scores.each do |score_second, count|
|
110
|
-
4.times do |letter|
|
110
|
+
4.times do |letter|
|
111
111
|
new_score_first = score_first + first.matrix[column][letter]
|
112
|
-
if new_score_first >= already_enough_first
|
112
|
+
if new_score_first >= already_enough_first
|
113
113
|
new_score_second = score_second + second.matrix[column][letter]
|
114
114
|
if new_score_second >= already_enough_second
|
115
115
|
result += count * ending_weight
|
116
|
-
elsif new_score_second >= least_sufficient_second
|
116
|
+
elsif new_score_second >= least_sufficient_second
|
117
117
|
new_scores[new_score_first][new_score_second] += count
|
118
118
|
end
|
119
119
|
elsif new_score_first >= least_sufficient_first
|
@@ -126,7 +126,7 @@ module PWMCompare
|
|
126
126
|
end
|
127
127
|
end
|
128
128
|
|
129
|
-
raise 'Hash overflow in
|
129
|
+
raise 'Hash overflow in Macroape::AlignedPairIntersection#common_words_for_two_matrices' if defined? MaxHashSizeDouble and new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSizeDouble
|
130
130
|
scores = new_scores
|
131
131
|
end
|
132
132
|
[result, result]
|
data/lib/macroape/collection.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module
|
1
|
+
module Macroape
|
2
2
|
class Collection
|
3
3
|
attr_reader :rough_discretization, :precise_discretization, :background, :pvalues, :pwms, :infos
|
4
4
|
def initialize(rough_discretization, precise_discretization, background, pvalues)
|
@@ -7,7 +7,6 @@ module PWM
|
|
7
7
|
@infos={}
|
8
8
|
end
|
9
9
|
def add_pwm(pwm,info)
|
10
|
-
#@pwms[pwm] = info
|
11
10
|
@pwms[pwm.name] = pwm
|
12
11
|
@infos[pwm.name] = info
|
13
12
|
end
|
@@ -1,34 +1,16 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'macroape/threshold_by_pvalue'
|
2
|
+
|
3
|
+
module Bioinform
|
4
|
+
class PWM
|
3
5
|
def counts_by_thresholds(*thresholds)
|
4
|
-
scores =
|
5
|
-
|
6
|
-
|
7
|
-
#scores.select{|score,count| score >= threshold}.map{|score,count| count}.inject(0){|sum,val|sum+val}
|
8
|
-
scores.inject(0){|sum,(score,count)| (score >= threshold) ? sum + count : sum}
|
6
|
+
scores = count_distribution_after_threshold(thresholds.min)
|
7
|
+
thresholds.map{ |threshold|
|
8
|
+
scores.inject(0.0){|sum,(score,count)| (score >= threshold) ? sum + count : sum}
|
9
9
|
}
|
10
|
-
=end
|
11
|
-
s_thr= thresholds.map.with_index{|threshold,index|[threshold,index]}.sort_by{|threshold,index| threshold}
|
12
|
-
|
13
|
-
cnt = 0
|
14
|
-
thr_cnts=[]
|
15
|
-
|
16
|
-
scores.sort.reverse.each do |score,count|
|
17
|
-
while !s_thr.empty? and score < s_thr.last[0]
|
18
|
-
thr_cnts.push(cnt)
|
19
|
-
s_thr.pop
|
20
|
-
end
|
21
|
-
cnt += count
|
22
|
-
end
|
23
|
-
s_thr = thresholds.map.with_index{|threshold,index|[threshold,index]}.sort_by{|threshold,index| threshold}
|
24
|
-
while thr_cnts.size < s_thr.size
|
25
|
-
thr_cnts.push(cnt)
|
26
|
-
end
|
27
|
-
s_thr.reverse.zip(thr_cnts).sort_by{|(threshold,index), count| index}.map{|(threshold,index), count| count.to_f}
|
28
10
|
end
|
29
11
|
|
30
12
|
def pvalue_by_threshold(threshold)
|
31
|
-
counts_by_thresholds(threshold).first /
|
13
|
+
counts_by_thresholds(threshold).first / vocabulary_volume
|
32
14
|
end
|
33
15
|
end
|
34
16
|
end
|
@@ -24,7 +24,7 @@ Examples:
|
|
24
24
|
or in linux
|
25
25
|
cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
26
26
|
}
|
27
|
-
|
27
|
+
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
28
28
|
require 'macroape'
|
29
29
|
|
30
30
|
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
@@ -69,9 +69,9 @@ begin
|
|
69
69
|
when '-d'
|
70
70
|
discretization = ARGV.shift.to_f
|
71
71
|
when '-m'
|
72
|
-
|
72
|
+
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
73
73
|
when '-md'
|
74
|
-
|
74
|
+
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
75
75
|
when '-b'
|
76
76
|
second_background = first_background = ARGV.shift(4).map(&:to_f)
|
77
77
|
when '-b1'
|
@@ -84,35 +84,35 @@ begin
|
|
84
84
|
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
|
85
85
|
|
86
86
|
|
87
|
-
|
88
|
-
|
87
|
+
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
88
|
+
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
89
89
|
|
90
|
-
if first_file == '.stdin' || second_file == '.stdin'
|
91
|
-
r_stream, w_stream = IO.pipe
|
92
|
-
STDIN.readlines.each{|line| w_stream.write(line)}
|
93
|
-
w_stream.close
|
94
|
-
end
|
90
|
+
# if first_file == '.stdin' || second_file == '.stdin'
|
91
|
+
# r_stream, w_stream = IO.pipe
|
92
|
+
# STDIN.readlines.each{|line| w_stream.write(line)}
|
93
|
+
# w_stream.close
|
94
|
+
# end
|
95
95
|
|
96
96
|
if first_file == '.stdin'
|
97
|
-
r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
98
|
-
pwm_first =
|
97
|
+
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
98
|
+
# pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
|
99
99
|
else
|
100
100
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
101
|
-
pwm_first = PWM
|
101
|
+
pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
|
102
102
|
end
|
103
103
|
|
104
104
|
if second_file == '.stdin'
|
105
|
-
r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
106
|
-
pwm_second =
|
105
|
+
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
106
|
+
# pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
|
107
107
|
else
|
108
108
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
109
|
-
pwm_second = PWM
|
109
|
+
pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
|
110
110
|
end
|
111
111
|
|
112
|
-
r_stream.close if first_file == '.stdin' || second_file == '.stdin'
|
112
|
+
# r_stream.close if first_file == '.stdin' || second_file == '.stdin'
|
113
113
|
|
114
114
|
|
115
|
-
pwm_second
|
115
|
+
pwm_second.reverse_complement! if reverse
|
116
116
|
|
117
117
|
first_pwm_alignment = '.' * [-shift, 0].max + '>' * pwm_first.length
|
118
118
|
second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * pwm_second.length
|
@@ -121,7 +121,7 @@ begin
|
|
121
121
|
(first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
|
122
122
|
(second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
|
123
123
|
|
124
|
-
cmp =
|
124
|
+
cmp = Macroape::PWMCompareAligned.new(pwm_first.left_augment([-shift,0].max),
|
125
125
|
pwm_second.left_augment([shift,0].max))
|
126
126
|
|
127
127
|
first_threshold = pwm_first.threshold(pvalue)
|
@@ -26,7 +26,9 @@ Examples:
|
|
26
26
|
cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
27
27
|
}
|
28
28
|
|
29
|
+
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
29
30
|
require 'macroape'
|
31
|
+
require 'bioinform'
|
30
32
|
|
31
33
|
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
32
34
|
STDERR.puts help_string
|
@@ -51,9 +53,9 @@ begin
|
|
51
53
|
when '-d'
|
52
54
|
discretization = ARGV.shift.to_f
|
53
55
|
when '-m'
|
54
|
-
|
56
|
+
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
55
57
|
when '-md'
|
56
|
-
|
58
|
+
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
57
59
|
when '-b'
|
58
60
|
second_background = first_background = ARGV.shift(4).map(&:to_f)
|
59
61
|
when '-b1'
|
@@ -65,35 +67,35 @@ begin
|
|
65
67
|
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
|
66
68
|
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
|
67
69
|
|
68
|
-
|
69
|
-
|
70
|
+
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
71
|
+
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
70
72
|
|
71
73
|
|
72
|
-
if first_file == '.stdin' || second_file == '.stdin'
|
73
|
-
r_stream, w_stream = IO.pipe
|
74
|
-
STDIN.readlines.each{|line| w_stream.write(line)}
|
75
|
-
w_stream.close
|
76
|
-
end
|
74
|
+
# if first_file == '.stdin' || second_file == '.stdin'
|
75
|
+
# r_stream, w_stream = IO.pipe
|
76
|
+
# STDIN.readlines.each{|line| w_stream.write(line)}
|
77
|
+
# w_stream.close
|
78
|
+
# end
|
77
79
|
|
78
80
|
if first_file == '.stdin'
|
79
|
-
r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
80
|
-
pwm_first =
|
81
|
+
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
82
|
+
# pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
|
81
83
|
else
|
82
84
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
83
|
-
pwm_first = PWM
|
85
|
+
pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
|
84
86
|
end
|
85
87
|
|
86
88
|
if second_file == '.stdin'
|
87
|
-
r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
88
|
-
pwm_second =
|
89
|
+
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
90
|
+
# pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
|
89
91
|
else
|
90
92
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
91
|
-
pwm_second = PWM
|
93
|
+
pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
|
92
94
|
end
|
93
95
|
|
94
96
|
r_stream.close if first_file == '.stdin' || second_file == '.stdin'
|
95
97
|
|
96
|
-
cmp =
|
98
|
+
cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
|
97
99
|
|
98
100
|
first_threshold = pwm_first.threshold(pvalue)
|
99
101
|
second_threshold = pwm_second.threshold(pvalue)
|
@@ -24,7 +24,9 @@ Examples:
|
|
24
24
|
cat motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
|
25
25
|
}
|
26
26
|
|
27
|
+
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
27
28
|
require 'macroape'
|
29
|
+
require 'bioinform'
|
28
30
|
|
29
31
|
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
30
32
|
STDERR.puts help_string
|
@@ -56,22 +58,22 @@ begin
|
|
56
58
|
when '-d'
|
57
59
|
discretization = ARGV.shift.to_f
|
58
60
|
when '-m'
|
59
|
-
|
61
|
+
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
60
62
|
end
|
61
63
|
end
|
62
|
-
|
64
|
+
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
63
65
|
|
64
66
|
|
65
67
|
if filename == '.stdin'
|
66
|
-
|
68
|
+
# TODO
|
67
69
|
else
|
68
70
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
69
|
-
pwm = PWM
|
71
|
+
pwm = Bioinform::PWM.new( File.read(filename) )
|
70
72
|
end
|
71
|
-
pwm
|
73
|
+
pwm.background(background)
|
72
74
|
|
73
75
|
counts = pwm.discrete(discretization).counts_by_thresholds(* thresholds.map{|count| count * discretization})
|
74
|
-
pvalues = counts.map{|count| count.to_f / pwm.
|
76
|
+
pvalues = counts.map{|count| count.to_f / pwm.vocabulary_volume}
|
75
77
|
pvalues.zip(thresholds,counts).each{|pvalue,threshold,count|
|
76
78
|
puts "#{threshold}\t#{count}\t#{pvalue}"
|
77
79
|
}
|