macroape 3.2.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/Rakefile +28 -7
  2. data/lib/macroape.rb +14 -26
  3. data/lib/macroape/aligned_pair_intersection.rb +24 -24
  4. data/lib/macroape/collection.rb +1 -2
  5. data/lib/macroape/count_by_threshold.rb +8 -26
  6. data/lib/macroape/exec/eval_alignment.rb +19 -19
  7. data/lib/macroape/exec/eval_similarity.rb +18 -16
  8. data/lib/macroape/exec/find_pvalue.rb +8 -6
  9. data/lib/macroape/exec/find_threshold.rb +7 -5
  10. data/lib/macroape/exec/preprocess_collection.rb +10 -7
  11. data/lib/macroape/exec/scan_collection.rb +13 -10
  12. data/lib/macroape/pwm_compare.rb +33 -2
  13. data/lib/macroape/pwm_compare_aligned.rb +38 -2
  14. data/lib/macroape/threshold_by_pvalue.rb +48 -43
  15. data/lib/macroape/version.rb +3 -3
  16. data/macroape.gemspec +2 -0
  17. data/test/data/test_collection.yaml +70 -4
  18. data/test/eval_alignment_similarity_test.rb +19 -0
  19. data/test/eval_similarity_test.rb +26 -0
  20. data/test/find_pvalue_test.rb +25 -0
  21. data/test/find_threshold_test.rb +29 -0
  22. data/test/preprocess_collection_test.rb +9 -0
  23. data/test/scan_collection_test.rb +17 -0
  24. data/test/test_helper.rb +10 -0
  25. metadata +33 -16
  26. data/lib/macroape/aligned_pair_metrics.rb +0 -24
  27. data/lib/macroape/aligned_pair_transformations.rb +0 -23
  28. data/lib/macroape/extract_pwm.rb +0 -32
  29. data/lib/macroape/gauss_estimation.rb +0 -30
  30. data/lib/macroape/matrix_information.rb +0 -29
  31. data/lib/macroape/matrix_on_background.rb +0 -16
  32. data/lib/macroape/matrix_transformations.rb +0 -29
  33. data/lib/macroape/pair_metrics.rb +0 -9
  34. data/lib/macroape/pair_transformations.rb +0 -28
  35. data/lib/macroape/single_matrix.rb +0 -45
  36. data/lib/macroape/support.rb +0 -34
  37. data/test/macroape_test.rb +0 -125
data/Rakefile CHANGED
@@ -1,7 +1,28 @@
1
- #!/usr/bin/env rake
2
- require "bundler/gem_tasks"
3
-
4
- desc 'Test all functionality of gem executables'
5
- task :test do
6
- system("ruby test/macroape_test.rb")
7
- end
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ namespace :spec do
5
+ task :find_threshold do
6
+ system("ruby -I ./test test/find_threshold_test.rb")
7
+ end
8
+ task :find_pvalue do
9
+ system("ruby -I ./test test/find_pvalue_test.rb")
10
+ end
11
+ task :eval_similarity do
12
+ system("ruby -I ./test test/eval_similarity_test.rb")
13
+ end
14
+ task :eval_alignment_similarity do
15
+ system("ruby -I ./test test/eval_alignment_similarity_test.rb")
16
+ end
17
+ task :preprocess_collection do
18
+ system("ruby -I ./test test/preprocess_collection_test.rb")
19
+ end
20
+ task :scan_collection do
21
+ system("ruby -I ./test test/scan_collection_test.rb")
22
+ end
23
+ task :all => [:find_threshold, :find_pvalue, :eval_similarity,
24
+ :eval_alignment_similarity, :scan_collection, :preprocess_collection]
25
+ end
26
+
27
+ desc 'Test all functionality of gem executables'
28
+ task :spec => ['spec:all']
data/lib/macroape.rb CHANGED
@@ -1,26 +1,14 @@
1
- require 'macroape/version'
2
- require 'yaml'
3
-
4
- require 'macroape/extract_pwm'
5
- require 'macroape/support'
6
- require 'macroape/matrix_transformations'
7
- require 'macroape/matrix_information'
8
- require 'macroape/gauss_estimation'
9
- require 'macroape/threshold_by_pvalue'
10
- require 'macroape/single_matrix'
11
- require 'macroape/count_by_threshold'
12
- require 'macroape/matrix_on_background'
13
-
14
- require 'macroape/aligned_pair_transformations'
15
- require 'macroape/aligned_pair_metrics'
16
- require 'macroape/aligned_pair_intersection'
17
- require 'macroape/pwm_compare_aligned'
18
-
19
- require 'macroape/pair_transformations'
20
- require 'macroape/pair_metrics'
21
- require 'macroape/pwm_compare'
22
- require 'macroape/collection'
23
-
24
- module Macroape
25
- # Your code goes here...
26
- end
1
+ require 'macroape/version'
2
+
3
+ require 'bioinform'
4
+ require 'macroape/threshold_by_pvalue'
5
+ require 'macroape/count_by_threshold'
6
+
7
+ require 'macroape/aligned_pair_intersection'
8
+ require 'macroape/pwm_compare_aligned'
9
+ require 'macroape/pwm_compare'
10
+ require 'macroape/collection'
11
+
12
+ module Macroape
13
+ # Your code goes here...
14
+ end
@@ -1,9 +1,9 @@
1
- module PWMCompare
2
- module AlignedPairIntersection
1
+ module Macroape
2
+ class PWMCompareAligned
3
3
 
4
4
  def counts_for_two_matrices(threshold_first, threshold_second)
5
- if first.probabilities == second.probabilities
6
- if first.probabilities == [1,1,1,1]
5
+ if first.background == second.background
6
+ if first.background == [1,1,1,1]
7
7
  common_words_for_two_matrices(threshold_first, threshold_second)
8
8
  else
9
9
  counts_for_two_matrices_with_same_probabilities(threshold_first, threshold_second)
@@ -18,8 +18,8 @@ module PWMCompare
18
18
  result_first = 0.0
19
19
  result_second = 0.0
20
20
  length.times do |column|
21
- ending_weight_first = first.sum_of_probabilities ** (length - column - 1)
22
- ending_weight_second = second.sum_of_probabilities ** (length - column - 1)
21
+ ending_weight_first = first.background_sum ** (length - column - 1)
22
+ ending_weight_second = second.background_sum ** (length - column - 1)
23
23
  already_enough_first = threshold_first - first.worst_suffix[column + 1]
24
24
  already_enough_second = threshold_second - second.worst_suffix[column + 1]
25
25
  least_sufficient_first = threshold_first - first.best_suffix[column + 1]
@@ -33,23 +33,23 @@ module PWMCompare
33
33
  if new_score_first >= already_enough_first
34
34
  new_score_second = score_second + second.matrix[column][letter]
35
35
  if new_score_second >= already_enough_second
36
- result_first += count[0] * first.probabilities[letter] * ending_weight_first
37
- result_second += count[1] * second.probabilities[letter] * ending_weight_second
36
+ result_first += count[0] * first.background[letter] * ending_weight_first
37
+ result_second += count[1] * second.background[letter] * ending_weight_second
38
38
  elsif new_score_second >= least_sufficient_second
39
- new_scores[new_score_first][new_score_second][0] += count[0] * first.probabilities[letter]
40
- new_scores[new_score_first][new_score_second][1] += count[1] * second.probabilities[letter]
39
+ new_scores[new_score_first][new_score_second][0] += count[0] * first.background[letter]
40
+ new_scores[new_score_first][new_score_second][1] += count[1] * second.background[letter]
41
41
  end
42
42
  elsif new_score_first >= least_sufficient_first
43
43
  new_score_second = score_second + second.matrix[column][letter]
44
44
  if new_score_second >= least_sufficient_second
45
- new_scores[new_score_first][new_score_second][0] += count[0] * first.probabilities[letter]
46
- new_scores[new_score_first][new_score_second][1] += count[1] * second.probabilities[letter]
45
+ new_scores[new_score_first][new_score_second][0] += count[0] * first.background[letter]
46
+ new_scores[new_score_first][new_score_second][1] += count[1] * second.background[letter]
47
47
  end
48
48
  end
49
49
  end
50
50
  end
51
51
  end
52
- raise 'Hash overflow in PWMCompare::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities' if new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSize
52
+ raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities' if new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSizeDouble
53
53
  scores = new_scores
54
54
  end
55
55
  [result_first, result_second]
@@ -58,9 +58,9 @@ module PWMCompare
58
58
  def counts_for_two_matrices_with_same_probabilities(threshold_first, threshold_second)
59
59
  scores = { 0 => {0 => 1} } # scores_on_first_pwm, scores_on_second_pwm --> count_on_first_probabilities, count_on_second_probabilities
60
60
  result = 0.0
61
- probabilities = first.probabilities
61
+ background = first.background
62
62
  length.times do |column|
63
- ending_weight = first.sum_of_probabilities ** (length - column - 1)
63
+ ending_weight = first.background_sum ** (length - column - 1)
64
64
  already_enough_first = threshold_first - first.worst_suffix[column + 1]
65
65
  already_enough_second = threshold_second - second.worst_suffix[column + 1]
66
66
  least_sufficient_first = threshold_first - first.best_suffix[column + 1]
@@ -74,20 +74,20 @@ module PWMCompare
74
74
  if new_score_first >= already_enough_first
75
75
  new_score_second = score_second + second.matrix[column][letter]
76
76
  if new_score_second >= already_enough_second
77
- result += count * probabilities[letter] * ending_weight
77
+ result += count * background[letter] * ending_weight
78
78
  elsif new_score_second >= least_sufficient_second
79
- new_scores[new_score_first][new_score_second] += count * probabilities[letter]
79
+ new_scores[new_score_first][new_score_second] += count * background[letter]
80
80
  end
81
81
  elsif new_score_first >= least_sufficient_first
82
82
  new_score_second = score_second + second.matrix[column][letter]
83
83
  if new_score_second >= least_sufficient_second
84
- new_scores[new_score_first][new_score_second] += count * probabilities[letter]
84
+ new_scores[new_score_first][new_score_second] += count * background[letter]
85
85
  end
86
86
  end
87
87
  end
88
88
  end
89
89
  end
90
- raise 'Hash overflow in PWMCompare::AlignedPairIntersection#counts_for_two_matrices_with_same_probabilities' if new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSize
90
+ raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_same_probabilities' if new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSizeDouble
91
91
  scores = new_scores
92
92
  end
93
93
  [result, result]
@@ -97,7 +97,7 @@ module PWMCompare
97
97
  def common_words_for_two_matrices(threshold_first, threshold_second)
98
98
  scores = { 0 => {0 => 1} } # scores_on_first_pwm, scores_on_second_pwm --> count_on_first_probabilities, count_on_second_probabilities
99
99
  result = 0
100
- length.times do |column|
100
+ length.times do |column|
101
101
  ending_weight = 4 ** (length - column - 1)
102
102
  already_enough_first = threshold_first - first.worst_suffix[column + 1]
103
103
  already_enough_second = threshold_second - second.worst_suffix[column + 1]
@@ -107,13 +107,13 @@ module PWMCompare
107
107
  new_scores = Hash.new{|h,k| h[k]=Hash.new{|h2,k2| h2[k2]=0} }
108
108
  scores.each do |score_first, second_scores|
109
109
  second_scores.each do |score_second, count|
110
- 4.times do |letter|
110
+ 4.times do |letter|
111
111
  new_score_first = score_first + first.matrix[column][letter]
112
- if new_score_first >= already_enough_first
112
+ if new_score_first >= already_enough_first
113
113
  new_score_second = score_second + second.matrix[column][letter]
114
114
  if new_score_second >= already_enough_second
115
115
  result += count * ending_weight
116
- elsif new_score_second >= least_sufficient_second
116
+ elsif new_score_second >= least_sufficient_second
117
117
  new_scores[new_score_first][new_score_second] += count
118
118
  end
119
119
  elsif new_score_first >= least_sufficient_first
@@ -126,7 +126,7 @@ module PWMCompare
126
126
  end
127
127
  end
128
128
 
129
- raise 'Hash overflow in PWMCompare::AlignedPairIntersection#common_words_for_two_matrices' if defined? MaxHashSize and new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSize
129
+ raise 'Hash overflow in Macroape::AlignedPairIntersection#common_words_for_two_matrices' if defined? MaxHashSizeDouble and new_scores.inject(0){|sum,hsh|sum+hsh.size} > MaxHashSizeDouble
130
130
  scores = new_scores
131
131
  end
132
132
  [result, result]
@@ -1,4 +1,4 @@
1
- module PWM
1
+ module Macroape
2
2
  class Collection
3
3
  attr_reader :rough_discretization, :precise_discretization, :background, :pvalues, :pwms, :infos
4
4
  def initialize(rough_discretization, precise_discretization, background, pvalues)
@@ -7,7 +7,6 @@ module PWM
7
7
  @infos={}
8
8
  end
9
9
  def add_pwm(pwm,info)
10
- #@pwms[pwm] = info
11
10
  @pwms[pwm.name] = pwm
12
11
  @infos[pwm.name] = info
13
12
  end
@@ -1,34 +1,16 @@
1
- module PWM
2
- module CountByThreshold
1
+ require 'macroape/threshold_by_pvalue'
2
+
3
+ module Bioinform
4
+ class PWM
3
5
  def counts_by_thresholds(*thresholds)
4
- scores = calculate_count_distribution_after_threshold(thresholds.min)
5
- =begin
6
- thresholds.map{ |threshold|
7
- #scores.select{|score,count| score >= threshold}.map{|score,count| count}.inject(0){|sum,val|sum+val}
8
- scores.inject(0){|sum,(score,count)| (score >= threshold) ? sum + count : sum}
6
+ scores = count_distribution_after_threshold(thresholds.min)
7
+ thresholds.map{ |threshold|
8
+ scores.inject(0.0){|sum,(score,count)| (score >= threshold) ? sum + count : sum}
9
9
  }
10
- =end
11
- s_thr= thresholds.map.with_index{|threshold,index|[threshold,index]}.sort_by{|threshold,index| threshold}
12
-
13
- cnt = 0
14
- thr_cnts=[]
15
-
16
- scores.sort.reverse.each do |score,count|
17
- while !s_thr.empty? and score < s_thr.last[0]
18
- thr_cnts.push(cnt)
19
- s_thr.pop
20
- end
21
- cnt += count
22
- end
23
- s_thr = thresholds.map.with_index{|threshold,index|[threshold,index]}.sort_by{|threshold,index| threshold}
24
- while thr_cnts.size < s_thr.size
25
- thr_cnts.push(cnt)
26
- end
27
- s_thr.reverse.zip(thr_cnts).sort_by{|(threshold,index), count| index}.map{|(threshold,index), count| count.to_f}
28
10
  end
29
11
 
30
12
  def pvalue_by_threshold(threshold)
31
- counts_by_thresholds(threshold).first / number_of_words
13
+ counts_by_thresholds(threshold).first / vocabulary_volume
32
14
  end
33
15
  end
34
16
  end
@@ -24,7 +24,7 @@ Examples:
24
24
  or in linux
25
25
  cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
26
26
  }
27
-
27
+ $:.unshift File.join(File.dirname(__FILE__),'./../../')
28
28
  require 'macroape'
29
29
 
30
30
  if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
@@ -69,9 +69,9 @@ begin
69
69
  when '-d'
70
70
  discretization = ARGV.shift.to_f
71
71
  when '-m'
72
- PWM::MaxHashSize = ARGV.shift.to_f
72
+ Macroape::MaxHashSizeSingle = ARGV.shift.to_f
73
73
  when '-md'
74
- PWMCompare::MaxHashSize = ARGV.shift.to_f
74
+ Macroape::MaxHashSizeDouble = ARGV.shift.to_f
75
75
  when '-b'
76
76
  second_background = first_background = ARGV.shift(4).map(&:to_f)
77
77
  when '-b1'
@@ -84,35 +84,35 @@ begin
84
84
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
85
85
 
86
86
 
87
- PWM::MaxHashSize = 1000000 unless defined? PWM::MaxHashSize
88
- PWMCompare::MaxHashSize = 1000 unless defined? PWMCompare::MaxHashSize
87
+ Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
88
+ Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
89
89
 
90
- if first_file == '.stdin' || second_file == '.stdin'
91
- r_stream, w_stream = IO.pipe
92
- STDIN.readlines.each{|line| w_stream.write(line)}
93
- w_stream.close
94
- end
90
+ # if first_file == '.stdin' || second_file == '.stdin'
91
+ # r_stream, w_stream = IO.pipe
92
+ # STDIN.readlines.each{|line| w_stream.write(line)}
93
+ # w_stream.close
94
+ # end
95
95
 
96
96
  if first_file == '.stdin'
97
- r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
98
- pwm_first = PWM::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
97
+ # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
98
+ # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
99
99
  else
100
100
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
101
- pwm_first = PWM::SingleMatrix.load_pat(first_file).with_background(first_background).discrete(discretization)
101
+ pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
102
102
  end
103
103
 
104
104
  if second_file == '.stdin'
105
- r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
106
- pwm_second = PWM::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
105
+ # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
106
+ # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
107
107
  else
108
108
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
109
- pwm_second = PWM::SingleMatrix.load_pat(second_file).with_background(second_background).discrete(discretization)
109
+ pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
110
110
  end
111
111
 
112
- r_stream.close if first_file == '.stdin' || second_file == '.stdin'
112
+ # r_stream.close if first_file == '.stdin' || second_file == '.stdin'
113
113
 
114
114
 
115
- pwm_second = pwm_second.reverse_complement if reverse
115
+ pwm_second.reverse_complement! if reverse
116
116
 
117
117
  first_pwm_alignment = '.' * [-shift, 0].max + '>' * pwm_first.length
118
118
  second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * pwm_second.length
@@ -121,7 +121,7 @@ begin
121
121
  (first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
122
122
  (second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
123
123
 
124
- cmp = PWMCompare::PWMCompareAligned.new(pwm_first.left_augment([-shift,0].max),
124
+ cmp = Macroape::PWMCompareAligned.new(pwm_first.left_augment([-shift,0].max),
125
125
  pwm_second.left_augment([shift,0].max))
126
126
 
127
127
  first_threshold = pwm_first.threshold(pvalue)
@@ -26,7 +26,9 @@ Examples:
26
26
  cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
27
27
  }
28
28
 
29
+ $:.unshift File.join(File.dirname(__FILE__),'./../../')
29
30
  require 'macroape'
31
+ require 'bioinform'
30
32
 
31
33
  if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
32
34
  STDERR.puts help_string
@@ -51,9 +53,9 @@ begin
51
53
  when '-d'
52
54
  discretization = ARGV.shift.to_f
53
55
  when '-m'
54
- PWM::MaxHashSize = ARGV.shift.to_f
56
+ Macroape::MaxHashSizeSingle = ARGV.shift.to_f
55
57
  when '-md'
56
- PWMCompare::MaxHashSize = ARGV.shift.to_f
58
+ Macroape::MaxHashSizeDouble = ARGV.shift.to_f
57
59
  when '-b'
58
60
  second_background = first_background = ARGV.shift(4).map(&:to_f)
59
61
  when '-b1'
@@ -65,35 +67,35 @@ begin
65
67
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
66
68
  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
67
69
 
68
- PWM::MaxHashSize = 1000000 unless defined? PWM::MaxHashSize
69
- PWMCompare::MaxHashSize = 1000 unless defined? PWMCompare::MaxHashSize
70
+ Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
71
+ Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
70
72
 
71
73
 
72
- if first_file == '.stdin' || second_file == '.stdin'
73
- r_stream, w_stream = IO.pipe
74
- STDIN.readlines.each{|line| w_stream.write(line)}
75
- w_stream.close
76
- end
74
+ # if first_file == '.stdin' || second_file == '.stdin'
75
+ # r_stream, w_stream = IO.pipe
76
+ # STDIN.readlines.each{|line| w_stream.write(line)}
77
+ # w_stream.close
78
+ # end
77
79
 
78
80
  if first_file == '.stdin'
79
- r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
80
- pwm_first = PWM::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
81
+ # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
82
+ # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
81
83
  else
82
84
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
83
- pwm_first = PWM::SingleMatrix.load_pat(first_file).with_background(first_background).discrete(discretization)
85
+ pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
84
86
  end
85
87
 
86
88
  if second_file == '.stdin'
87
- r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
88
- pwm_second = PWM::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
89
+ # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
90
+ # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
89
91
  else
90
92
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
91
- pwm_second = PWM::SingleMatrix.load_pat(second_file).with_background(second_background).discrete(discretization)
93
+ pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
92
94
  end
93
95
 
94
96
  r_stream.close if first_file == '.stdin' || second_file == '.stdin'
95
97
 
96
- cmp = PWMCompare::PWMCompare.new(pwm_first, pwm_second)
98
+ cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
97
99
 
98
100
  first_threshold = pwm_first.threshold(pvalue)
99
101
  second_threshold = pwm_second.threshold(pvalue)
@@ -24,7 +24,9 @@ Examples:
24
24
  cat motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
25
25
  }
26
26
 
27
+ $:.unshift File.join(File.dirname(__FILE__),'./../../')
27
28
  require 'macroape'
29
+ require 'bioinform'
28
30
 
29
31
  if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
30
32
  STDERR.puts help_string
@@ -56,22 +58,22 @@ begin
56
58
  when '-d'
57
59
  discretization = ARGV.shift.to_f
58
60
  when '-m'
59
- PWM::MaxHashSize = ARGV.shift.to_f
61
+ Macroape::MaxHashSizeSingle = ARGV.shift.to_f
60
62
  end
61
63
  end
62
- PWM::MaxHashSize = 1000000 unless defined? PWM::MaxHashSize
64
+ Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
63
65
 
64
66
 
65
67
  if filename == '.stdin'
66
- pwm = PWM::SingleMatrix.load_from_stdin(STDIN)
68
+ # TODO
67
69
  else
68
70
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
69
- pwm = PWM::SingleMatrix.load_pat(filename)
71
+ pwm = Bioinform::PWM.new( File.read(filename) )
70
72
  end
71
- pwm = pwm.with_background(background)
73
+ pwm.background(background)
72
74
 
73
75
  counts = pwm.discrete(discretization).counts_by_thresholds(* thresholds.map{|count| count * discretization})
74
- pvalues = counts.map{|count| count.to_f / pwm.number_of_words}
76
+ pvalues = counts.map{|count| count.to_f / pwm.vocabulary_volume}
75
77
  pvalues.zip(thresholds,counts).each{|pvalue,threshold,count|
76
78
  puts "#{threshold}\t#{count}\t#{pvalue}"
77
79
  }