macroape 3.3.3 → 3.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/.gitignore +1 -0
  2. data/Rakefile.rb +7 -22
  3. data/TODO.txt +7 -6
  4. data/bin/align_motifs +4 -0
  5. data/bin/eval_alignment +2 -1
  6. data/bin/eval_similarity +2 -1
  7. data/bin/find_pvalue +2 -1
  8. data/bin/find_threshold +2 -1
  9. data/bin/preprocess_collection +2 -1
  10. data/bin/scan_collection +2 -1
  11. data/lib/macroape/aligned_pair_intersection.rb +2 -3
  12. data/lib/macroape/cli/align_motifs.rb +49 -0
  13. data/lib/macroape/cli/eval_alignment.rb +124 -0
  14. data/lib/macroape/cli/eval_similarity.rb +107 -0
  15. data/lib/macroape/cli/find_pvalue.rb +89 -0
  16. data/lib/macroape/cli/find_threshold.rb +84 -0
  17. data/lib/macroape/cli/preprocess_collection.rb +123 -0
  18. data/lib/macroape/cli/scan_collection.rb +141 -0
  19. data/lib/macroape/cli.rb +5 -0
  20. data/lib/macroape/counting.rb +15 -1
  21. data/lib/macroape/pwm_compare.rb +21 -1
  22. data/lib/macroape/pwm_compare_aligned.rb +21 -0
  23. data/lib/macroape/version.rb +1 -1
  24. data/macroape.gemspec +1 -1
  25. data/test/align_motifs_test.rb +12 -0
  26. data/test/data/KLF3_f1.pat +16 -0
  27. data/test/data/KLF3_f1.pcm +16 -0
  28. data/test/data/KLF4_f2.pcm +11 -0
  29. data/test/data/SP1_f1.pat +11 -11
  30. data/test/data/SP1_f1.pcm +12 -0
  31. data/test/data/SP1_f1_revcomp.pat +11 -11
  32. data/test/data/SP1_f1_revcomp.pcm +12 -0
  33. data/test/data/test_collection/SP1_f1.pat +11 -11
  34. data/test/data/test_collection.yaml +49 -109
  35. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
  36. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
  37. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
  38. data/test/data/test_collection_single_file.txt +38 -0
  39. data/test/data/test_collection_single_file_pcm.txt +38 -0
  40. data/test/eval_alignment_test.rb +31 -0
  41. data/test/eval_similarity_test.rb +28 -13
  42. data/test/find_pvalue_test.rb +10 -13
  43. data/test/find_threshold_test.rb +10 -5
  44. data/test/preprocess_collection_test.rb +36 -2
  45. data/test/scan_collection_test.rb +9 -4
  46. data/test/test_helper.rb +61 -2
  47. metadata +38 -12
  48. data/lib/macroape/exec/eval_alignment.rb +0 -125
  49. data/lib/macroape/exec/eval_similarity.rb +0 -108
  50. data/lib/macroape/exec/find_pvalue.rb +0 -81
  51. data/lib/macroape/exec/find_threshold.rb +0 -77
  52. data/lib/macroape/exec/preprocess_collection.rb +0 -101
  53. data/lib/macroape/exec/scan_collection.rb +0 -124
  54. data/test/eval_alignment_similarity_test.rb +0 -20
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.3
4
+ version: 3.3.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-19 00:00:00.000000000 Z
12
+ date: 2012-09-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bioinform
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.1.2
21
+ version: 0.1.5
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.1.2
29
+ version: 0.1.5
30
30
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
31
31
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
32
32
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
@@ -35,6 +35,7 @@ description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-
35
35
  email:
36
36
  - prijutme4ty@gmail.com
37
37
  executables:
38
+ - align_motifs
38
39
  - eval_alignment
39
40
  - eval_similarity
40
41
  - find_pvalue
@@ -51,6 +52,7 @@ files:
51
52
  - Rakefile.rb
52
53
  - TODO.txt
53
54
  - benchmark/similarity_benchmark.rb
55
+ - bin/align_motifs
54
56
  - bin/eval_alignment
55
57
  - bin/eval_similarity
56
58
  - bin/find_pvalue
@@ -59,32 +61,45 @@ files:
59
61
  - bin/scan_collection
60
62
  - lib/macroape.rb
61
63
  - lib/macroape/aligned_pair_intersection.rb
64
+ - lib/macroape/cli.rb
65
+ - lib/macroape/cli/align_motifs.rb
66
+ - lib/macroape/cli/eval_alignment.rb
67
+ - lib/macroape/cli/eval_similarity.rb
68
+ - lib/macroape/cli/find_pvalue.rb
69
+ - lib/macroape/cli/find_threshold.rb
70
+ - lib/macroape/cli/preprocess_collection.rb
71
+ - lib/macroape/cli/scan_collection.rb
62
72
  - lib/macroape/collection.rb
63
73
  - lib/macroape/counting.rb
64
- - lib/macroape/exec/eval_alignment.rb
65
- - lib/macroape/exec/eval_similarity.rb
66
- - lib/macroape/exec/find_pvalue.rb
67
- - lib/macroape/exec/find_threshold.rb
68
- - lib/macroape/exec/preprocess_collection.rb
69
- - lib/macroape/exec/scan_collection.rb
70
74
  - lib/macroape/pwm_compare.rb
71
75
  - lib/macroape/pwm_compare_aligned.rb
72
76
  - lib/macroape/version.rb
73
77
  - macroape.gemspec
74
78
  - spec/count_distribution_spec.rb
75
79
  - spec/spec_helper.rb
80
+ - test/align_motifs_test.rb
76
81
  - test/data/AHR_si.pat
82
+ - test/data/KLF3_f1.pat
83
+ - test/data/KLF3_f1.pcm
77
84
  - test/data/KLF4_f2.pat
85
+ - test/data/KLF4_f2.pcm
78
86
  - test/data/KLF4_f2_scan_results_all.txt
79
87
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
80
88
  - test/data/KLF4_f2_scan_results_precise_mode.txt
81
89
  - test/data/SP1_f1.pat
90
+ - test/data/SP1_f1.pcm
82
91
  - test/data/SP1_f1_revcomp.pat
92
+ - test/data/SP1_f1_revcomp.pcm
83
93
  - test/data/test_collection.yaml
84
94
  - test/data/test_collection/GABPA_f1.pat
85
95
  - test/data/test_collection/KLF4_f2.pat
86
96
  - test/data/test_collection/SP1_f1.pat
87
- - test/eval_alignment_similarity_test.rb
97
+ - test/data/test_collection_pcm/GABPA_f1.pcm
98
+ - test/data/test_collection_pcm/KLF4_f2.pcm
99
+ - test/data/test_collection_pcm/SP1_f1.pcm
100
+ - test/data/test_collection_single_file.txt
101
+ - test/data/test_collection_single_file_pcm.txt
102
+ - test/eval_alignment_test.rb
88
103
  - test/eval_similarity_test.rb
89
104
  - test/find_pvalue_test.rb
90
105
  - test/find_threshold_test.rb
@@ -118,18 +133,29 @@ summary: PWM comparison tool using MACROAPE approach
118
133
  test_files:
119
134
  - spec/count_distribution_spec.rb
120
135
  - spec/spec_helper.rb
136
+ - test/align_motifs_test.rb
121
137
  - test/data/AHR_si.pat
138
+ - test/data/KLF3_f1.pat
139
+ - test/data/KLF3_f1.pcm
122
140
  - test/data/KLF4_f2.pat
141
+ - test/data/KLF4_f2.pcm
123
142
  - test/data/KLF4_f2_scan_results_all.txt
124
143
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
125
144
  - test/data/KLF4_f2_scan_results_precise_mode.txt
126
145
  - test/data/SP1_f1.pat
146
+ - test/data/SP1_f1.pcm
127
147
  - test/data/SP1_f1_revcomp.pat
148
+ - test/data/SP1_f1_revcomp.pcm
128
149
  - test/data/test_collection.yaml
129
150
  - test/data/test_collection/GABPA_f1.pat
130
151
  - test/data/test_collection/KLF4_f2.pat
131
152
  - test/data/test_collection/SP1_f1.pat
132
- - test/eval_alignment_similarity_test.rb
153
+ - test/data/test_collection_pcm/GABPA_f1.pcm
154
+ - test/data/test_collection_pcm/KLF4_f2.pcm
155
+ - test/data/test_collection_pcm/SP1_f1.pcm
156
+ - test/data/test_collection_single_file.txt
157
+ - test/data/test_collection_single_file_pcm.txt
158
+ - test/eval_alignment_test.rb
133
159
  - test/eval_similarity_test.rb
134
160
  - test/find_pvalue_test.rb
135
161
  - test/find_threshold_test.rb
@@ -1,125 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby eval_alignment.rb <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
4
- type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
5
- or in linux
6
- cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
7
-
8
- Options:
9
- [-p <P-value>]
10
- [-d <discretization level>]
11
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
12
-
13
- Output format:
14
- <jaccard similarity coefficient>
15
- <number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the given alignment>
16
- <aligned 1st matrix>
17
- <aligned 2nd matrix>
18
- <shift> <orientation>
19
-
20
- Examples:
21
- ruby eval_alignment.rb motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
22
- or on windows
23
- type motifs/SP1.pat | ruby eval_alignment.rb motifs/KLF4.pat .stdin 0 revcomp -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
24
- or in linux
25
- cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
26
- }
27
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
28
- require 'macroape'
29
-
30
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
31
- STDERR.puts help_string
32
- exit
33
- end
34
-
35
- pvalue = 0.0005
36
- discretization = 10
37
-
38
- first_background = [1,1,1,1]
39
- second_background = [1,1,1,1]
40
-
41
- begin
42
- first_file = ARGV.shift
43
- second_file = ARGV.shift
44
-
45
- shift = ARGV.shift
46
- orientation = ARGV.shift
47
-
48
- raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
49
- raise 'You\'d specify shift' unless shift
50
- raise 'You\'d specify orientation' unless orientation
51
-
52
- shift = shift.to_i
53
- orientation = orientation.to_sym
54
-
55
- case orientation
56
- when :direct
57
- reverse = false
58
- when :revcomp
59
- reverse = true
60
- else
61
- raise 'Unknown orientation(direct/revcomp)'
62
- end
63
-
64
-
65
- until ARGV.empty?
66
- case ARGV.shift
67
- when '-p'
68
- pvalue = ARGV.shift.to_f
69
- when '-d'
70
- discretization = ARGV.shift.to_f
71
- when '-m'
72
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
73
- when '-md'
74
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
75
- when '-b'
76
- second_background = first_background = ARGV.shift(4).map(&:to_f)
77
- when '-b1'
78
- first_background = ARGV.shift(4).map(&:to_f)
79
- when '-b2'
80
- second_background = ARGV.shift(4).map(&:to_f)
81
- end
82
- end
83
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
84
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
85
-
86
-
87
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
88
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
89
-
90
- # if first_file == '.stdin' || second_file == '.stdin'
91
- # r_stream, w_stream = IO.pipe
92
- # STDIN.readlines.each{|line| w_stream.write(line)}
93
- # w_stream.close
94
- # end
95
-
96
- if first_file == '.stdin'
97
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
98
- # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
99
- else
100
- raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
101
- pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
102
- end
103
-
104
- if second_file == '.stdin'
105
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
106
- # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
107
- else
108
- raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
109
- pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
110
- end
111
-
112
- # r_stream.close if first_file == '.stdin' || second_file == '.stdin'
113
-
114
- cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
115
-
116
- first_threshold = pwm_first.threshold(pvalue)
117
- second_threshold = pwm_second.threshold(pvalue)
118
-
119
- info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
120
-
121
- puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
122
-
123
- rescue => err
124
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
125
- end
@@ -1,108 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby eval_similarity.rb <1st matrix pat-file> <2nd matrix pat-file> [options]
4
- or on windows
5
- type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
6
- or in linux
7
- cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
8
-
9
- Options:
10
- [-p <P-value>]
11
- [-d <discretization level>]
12
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
13
-
14
- Output has format:
15
- <jaccard similarity coefficient>
16
- <number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the optimal alignment>
17
- <optimal alignment, the 1st matrix>
18
- <optimal alignment, the 2nd matrix>
19
- <shift> <orientation>
20
-
21
- Examples:
22
- ruby eval_similarity.rb motifs/KLF4.pat motifs/SP1.pat -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
23
- or on windows
24
- type motifs/SP1.pat | ruby eval_similarity.rb motifs/KLF4.pat .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
25
- or in linux
26
- cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
27
- }
28
-
29
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
30
- require 'macroape'
31
-
32
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
33
- STDERR.puts help_string
34
- exit
35
- end
36
-
37
- pvalue = 0.0005
38
- discretization = 10
39
-
40
- first_background = [1,1,1,1]
41
- second_background = [1,1,1,1]
42
-
43
- begin
44
- first_file = ARGV.shift
45
- second_file = ARGV.shift
46
- raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
47
-
48
- until ARGV.empty?
49
- case ARGV.shift
50
- when '-p'
51
- pvalue = ARGV.shift.to_f
52
- when '-d'
53
- discretization = ARGV.shift.to_f
54
- when '-m'
55
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
56
- when '-md'
57
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
58
- when '-b'
59
- second_background = first_background = ARGV.shift(4).map(&:to_f)
60
- when '-b1'
61
- first_background = ARGV.shift(4).map(&:to_f)
62
- when '-b2'
63
- second_background = ARGV.shift(4).map(&:to_f)
64
- end
65
- end
66
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
67
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
68
-
69
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
70
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
71
-
72
-
73
- # if first_file == '.stdin' || second_file == '.stdin'
74
- # r_stream, w_stream = IO.pipe
75
- # STDIN.readlines.each{|line| w_stream.write(line)}
76
- # w_stream.close
77
- # end
78
-
79
- if first_file == '.stdin'
80
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
81
- # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
82
- else
83
- raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
84
- pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
85
- end
86
-
87
- if second_file == '.stdin'
88
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
89
- # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
90
- else
91
- raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
92
- pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
93
- end
94
-
95
- r_stream.close if first_file == '.stdin' || second_file == '.stdin'
96
-
97
- cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
98
-
99
- first_threshold = pwm_first.threshold(pvalue)
100
- second_threshold = pwm_second.threshold(pvalue)
101
-
102
- info = cmp.jaccard(first_threshold, second_threshold)
103
-
104
- puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
105
-
106
- rescue => err
107
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
108
- end
@@ -1,81 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby find_pvalue.rb <pat-file> <threshold list> [options]
4
- or in linux
5
- cat <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
6
- or on windows
7
- type <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
8
-
9
- Options:
10
- [-d <discretization level>]
11
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
12
-
13
- Output format:
14
- threshold_1 count_1 pvalue_1
15
- threshold_2 count_2 pvalue_2
16
- threshold_3 count_3 pvalue_3
17
- The results are printed out in the same order as in the given threshold list.
18
-
19
- Examples:
20
- ruby find_pvalue.rb motifs/KLF4.pat 7.32 -d 1000 -b 0.2 0.3 0.2 0.3
21
- or on windows
22
- type motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
23
- or in linux
24
- cat motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
25
- }
26
-
27
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
28
- require 'macroape'
29
-
30
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
31
- STDERR.puts help_string
32
- exit
33
- end
34
-
35
- discretization = 10000
36
- background = [1,1,1,1]
37
- thresholds = []
38
- begin
39
- filename = ARGV.shift
40
-
41
- loop do
42
- begin
43
- Float(ARGV.first)
44
- thresholds << ARGV.shift.to_f
45
- rescue
46
- raise StopIteration
47
- end
48
- end
49
-
50
- raise "No input. You'd specify input source: filename or .stdin" unless filename
51
- raise 'You should specify at least one threshold' if thresholds.empty?
52
-
53
- until ARGV.empty?
54
- case ARGV.shift
55
- when '-b'
56
- background = ARGV.shift(4).map(&:to_f)
57
- when '-d'
58
- discretization = ARGV.shift.to_f
59
- when '-m'
60
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
61
- end
62
- end
63
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
64
-
65
-
66
- if filename == '.stdin'
67
- # TODO
68
- else
69
- raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
70
- pwm = Bioinform::PWM.new( File.read(filename) )
71
- end
72
- pwm.background(background)
73
-
74
- counts = pwm.discrete(discretization).counts_by_thresholds(* thresholds.map{|count| count * discretization})
75
- pvalues = counts.map{|count| count.to_f / pwm.vocabulary_volume}
76
- pvalues.zip(thresholds,counts).each{|pvalue,threshold,count|
77
- puts "#{threshold}\t#{count}\t#{pvalue}"
78
- }
79
- rescue => err
80
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
81
- end
@@ -1,77 +0,0 @@
1
- help_string = %q{
2
- Command-line format::
3
- ruby find_threshold.rb <pat-file> [options]
4
- or in linux
5
- cat <pat-file> | ruby find_threshold.rb .stdin [options]
6
- or on windows
7
- type <pat-file> | ruby find_threshold.rb .stdin [options]
8
-
9
- Options:
10
- [-p <list of P-values>]
11
- [-d <discretization level>]
12
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
13
-
14
- Output format:
15
- requested_pvalue_1 threshold_1 achieved_pvalue_1
16
- requested_pvalue_2 threshold_2 achieved_pvalue_2
17
-
18
-
19
- Example:
20
- ruby find_threshold.rb motifs/KLF4.pat -p 0.001 0.0001 0.0005 -d 1000 -b 0.4 0.3 0.2 0.1
21
- }
22
-
23
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
24
- require 'macroape'
25
-
26
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
27
- STDERR.puts help_string
28
- exit
29
- end
30
-
31
- background = [1,1,1,1]
32
- default_pvalues = [0.0005]
33
- discretization = 10000
34
-
35
- begin
36
- filename = ARGV.shift
37
- raise "No input. You'd specify input source: filename or .stdin" unless filename
38
-
39
- pvalues = []
40
- until ARGV.empty?
41
- case ARGV.shift
42
- when '-b'
43
- background = ARGV.shift(4).map(&:to_f)
44
- when '-m'
45
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
46
- when '-p'
47
- loop do
48
- begin
49
- Float(ARGV.first)
50
- pvalues << ARGV.shift.to_f
51
- rescue
52
- raise StopIteration
53
- end
54
- end
55
- when '-d'
56
- discretization = ARGV.shift.to_f
57
- end
58
- end
59
- pvalues = default_pvalues if pvalues.empty?
60
-
61
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
62
-
63
- if filename == '.stdin'
64
- ## TODO
65
- else
66
- raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
67
- pwm = Bioinform::PWM.new( File.read(filename) )
68
- end
69
-
70
- pwm.background(background)
71
-
72
- pwm.discrete(discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
73
- puts "#{pvalue}\t#{threshold / discretization}\t#{real_pvalue}"
74
- end
75
- rescue => err
76
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
77
- end
@@ -1,101 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby preprocess_collection.rb <folder with PWMs> [options]
4
-
5
- Options:
6
- [-p <list of P-values>]
7
- [-d <rough discretization> <precise discretization>]
8
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
9
- [-o <output file>]
10
- [--silent] - don't show current progress information during scan (by default this information's written into stderr)
11
-
12
- The tool stores preprocessed Macroape collection to the specified YAML-file.
13
-
14
- Example:
15
- ruby preprocess_collection.rb ./motifs -p 0.001 0.0005 0.0001 -d 1 10 -b 0.2 0.3 0.2 0.3 -o collection.yaml
16
- }
17
-
18
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
19
- require 'macroape'
20
- require 'yaml'
21
-
22
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
23
- STDERR.puts help_string
24
- exit
25
- end
26
-
27
- default_pvalues = [0.0005]
28
- background = [1,1,1,1]
29
- rough_discretization = 1
30
- precise_discretization = 10
31
- output_file = 'collection.yaml'
32
-
33
- begin
34
- folder = ARGV.shift
35
- raise "No input. You'd specify folder with pat-files" unless folder
36
- raise "Error! Folder #{folder} doesn't exist" unless Dir.exist?(folder)
37
-
38
- pvalues = []
39
- silent = false
40
- until ARGV.empty?
41
- case ARGV.shift
42
- when '-b'
43
- background = ARGV.shift(4).map(&:to_f)
44
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background == background.reverse
45
- when '-p'
46
- loop do
47
- begin
48
- Float(ARGV.first)
49
- pvalues << ARGV.shift.to_f
50
- rescue
51
- raise StopIteration
52
- end
53
- end
54
- when '-d'
55
- rough_discretization, precise_discretization = ARGV.shift(2).map(&:to_f).sort
56
- when '-o'
57
- output_file = ARGV.shift
58
- when '-m'
59
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
60
- when '-md'
61
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
62
- when '--silent'
63
- silent = true
64
- end
65
- end
66
- pvalues = default_pvalues if pvalues.empty?
67
-
68
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
69
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
70
-
71
- collection = Macroape::Collection.new(rough_discretization, precise_discretization, background, pvalues)
72
-
73
- current_dir = File.dirname(__FILE__)
74
- Dir.glob(File.join(folder,'*')) do |filename|
75
- STDERR.puts filename unless silent
76
- pwm = Bioinform::PWM.new(File.read(filename))
77
- pwm.name ||= File.basename(filename, File.extname(filename))
78
-
79
- # When support of onefile collections is introduced - then here should be check if name exists.
80
- # Otherwise it should skip motif and tell you about this
81
- # Also two command line options to fail on skipping or to skip silently should be included
82
-
83
- info = {rough: {}, precise: {}}
84
- pwm.background(background)
85
-
86
- pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
87
- info[:rough][pvalue] = threshold / rough_discretization
88
- end
89
-
90
- pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
91
- info[:precise][pvalue] = threshold / precise_discretization
92
- end
93
-
94
- collection.add_pwm(pwm, info)
95
- end
96
- File.open(output_file,'w') do |f|
97
- f.puts(collection.to_yaml)
98
- end
99
- rescue => err
100
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
101
- end