macroape 3.3.3 → 3.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.gitignore +1 -0
  2. data/Rakefile.rb +7 -22
  3. data/TODO.txt +7 -6
  4. data/bin/align_motifs +4 -0
  5. data/bin/eval_alignment +2 -1
  6. data/bin/eval_similarity +2 -1
  7. data/bin/find_pvalue +2 -1
  8. data/bin/find_threshold +2 -1
  9. data/bin/preprocess_collection +2 -1
  10. data/bin/scan_collection +2 -1
  11. data/lib/macroape/aligned_pair_intersection.rb +2 -3
  12. data/lib/macroape/cli/align_motifs.rb +49 -0
  13. data/lib/macroape/cli/eval_alignment.rb +124 -0
  14. data/lib/macroape/cli/eval_similarity.rb +107 -0
  15. data/lib/macroape/cli/find_pvalue.rb +89 -0
  16. data/lib/macroape/cli/find_threshold.rb +84 -0
  17. data/lib/macroape/cli/preprocess_collection.rb +123 -0
  18. data/lib/macroape/cli/scan_collection.rb +141 -0
  19. data/lib/macroape/cli.rb +5 -0
  20. data/lib/macroape/counting.rb +15 -1
  21. data/lib/macroape/pwm_compare.rb +21 -1
  22. data/lib/macroape/pwm_compare_aligned.rb +21 -0
  23. data/lib/macroape/version.rb +1 -1
  24. data/macroape.gemspec +1 -1
  25. data/test/align_motifs_test.rb +12 -0
  26. data/test/data/KLF3_f1.pat +16 -0
  27. data/test/data/KLF3_f1.pcm +16 -0
  28. data/test/data/KLF4_f2.pcm +11 -0
  29. data/test/data/SP1_f1.pat +11 -11
  30. data/test/data/SP1_f1.pcm +12 -0
  31. data/test/data/SP1_f1_revcomp.pat +11 -11
  32. data/test/data/SP1_f1_revcomp.pcm +12 -0
  33. data/test/data/test_collection/SP1_f1.pat +11 -11
  34. data/test/data/test_collection.yaml +49 -109
  35. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
  36. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
  37. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
  38. data/test/data/test_collection_single_file.txt +38 -0
  39. data/test/data/test_collection_single_file_pcm.txt +38 -0
  40. data/test/eval_alignment_test.rb +31 -0
  41. data/test/eval_similarity_test.rb +28 -13
  42. data/test/find_pvalue_test.rb +10 -13
  43. data/test/find_threshold_test.rb +10 -5
  44. data/test/preprocess_collection_test.rb +36 -2
  45. data/test/scan_collection_test.rb +9 -4
  46. data/test/test_helper.rb +61 -2
  47. metadata +38 -12
  48. data/lib/macroape/exec/eval_alignment.rb +0 -125
  49. data/lib/macroape/exec/eval_similarity.rb +0 -108
  50. data/lib/macroape/exec/find_pvalue.rb +0 -81
  51. data/lib/macroape/exec/find_threshold.rb +0 -77
  52. data/lib/macroape/exec/preprocess_collection.rb +0 -101
  53. data/lib/macroape/exec/scan_collection.rb +0 -124
  54. data/test/eval_alignment_similarity_test.rb +0 -20
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.3
4
+ version: 3.3.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-19 00:00:00.000000000 Z
12
+ date: 2012-09-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bioinform
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.1.2
21
+ version: 0.1.5
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.1.2
29
+ version: 0.1.5
30
30
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
31
31
  Estimation. It's a bioinformatic tool for evaluating similarity measure and best
32
32
  alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
@@ -35,6 +35,7 @@ description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-
35
35
  email:
36
36
  - prijutme4ty@gmail.com
37
37
  executables:
38
+ - align_motifs
38
39
  - eval_alignment
39
40
  - eval_similarity
40
41
  - find_pvalue
@@ -51,6 +52,7 @@ files:
51
52
  - Rakefile.rb
52
53
  - TODO.txt
53
54
  - benchmark/similarity_benchmark.rb
55
+ - bin/align_motifs
54
56
  - bin/eval_alignment
55
57
  - bin/eval_similarity
56
58
  - bin/find_pvalue
@@ -59,32 +61,45 @@ files:
59
61
  - bin/scan_collection
60
62
  - lib/macroape.rb
61
63
  - lib/macroape/aligned_pair_intersection.rb
64
+ - lib/macroape/cli.rb
65
+ - lib/macroape/cli/align_motifs.rb
66
+ - lib/macroape/cli/eval_alignment.rb
67
+ - lib/macroape/cli/eval_similarity.rb
68
+ - lib/macroape/cli/find_pvalue.rb
69
+ - lib/macroape/cli/find_threshold.rb
70
+ - lib/macroape/cli/preprocess_collection.rb
71
+ - lib/macroape/cli/scan_collection.rb
62
72
  - lib/macroape/collection.rb
63
73
  - lib/macroape/counting.rb
64
- - lib/macroape/exec/eval_alignment.rb
65
- - lib/macroape/exec/eval_similarity.rb
66
- - lib/macroape/exec/find_pvalue.rb
67
- - lib/macroape/exec/find_threshold.rb
68
- - lib/macroape/exec/preprocess_collection.rb
69
- - lib/macroape/exec/scan_collection.rb
70
74
  - lib/macroape/pwm_compare.rb
71
75
  - lib/macroape/pwm_compare_aligned.rb
72
76
  - lib/macroape/version.rb
73
77
  - macroape.gemspec
74
78
  - spec/count_distribution_spec.rb
75
79
  - spec/spec_helper.rb
80
+ - test/align_motifs_test.rb
76
81
  - test/data/AHR_si.pat
82
+ - test/data/KLF3_f1.pat
83
+ - test/data/KLF3_f1.pcm
77
84
  - test/data/KLF4_f2.pat
85
+ - test/data/KLF4_f2.pcm
78
86
  - test/data/KLF4_f2_scan_results_all.txt
79
87
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
80
88
  - test/data/KLF4_f2_scan_results_precise_mode.txt
81
89
  - test/data/SP1_f1.pat
90
+ - test/data/SP1_f1.pcm
82
91
  - test/data/SP1_f1_revcomp.pat
92
+ - test/data/SP1_f1_revcomp.pcm
83
93
  - test/data/test_collection.yaml
84
94
  - test/data/test_collection/GABPA_f1.pat
85
95
  - test/data/test_collection/KLF4_f2.pat
86
96
  - test/data/test_collection/SP1_f1.pat
87
- - test/eval_alignment_similarity_test.rb
97
+ - test/data/test_collection_pcm/GABPA_f1.pcm
98
+ - test/data/test_collection_pcm/KLF4_f2.pcm
99
+ - test/data/test_collection_pcm/SP1_f1.pcm
100
+ - test/data/test_collection_single_file.txt
101
+ - test/data/test_collection_single_file_pcm.txt
102
+ - test/eval_alignment_test.rb
88
103
  - test/eval_similarity_test.rb
89
104
  - test/find_pvalue_test.rb
90
105
  - test/find_threshold_test.rb
@@ -118,18 +133,29 @@ summary: PWM comparison tool using MACROAPE approach
118
133
  test_files:
119
134
  - spec/count_distribution_spec.rb
120
135
  - spec/spec_helper.rb
136
+ - test/align_motifs_test.rb
121
137
  - test/data/AHR_si.pat
138
+ - test/data/KLF3_f1.pat
139
+ - test/data/KLF3_f1.pcm
122
140
  - test/data/KLF4_f2.pat
141
+ - test/data/KLF4_f2.pcm
123
142
  - test/data/KLF4_f2_scan_results_all.txt
124
143
  - test/data/KLF4_f2_scan_results_default_cutoff.txt
125
144
  - test/data/KLF4_f2_scan_results_precise_mode.txt
126
145
  - test/data/SP1_f1.pat
146
+ - test/data/SP1_f1.pcm
127
147
  - test/data/SP1_f1_revcomp.pat
148
+ - test/data/SP1_f1_revcomp.pcm
128
149
  - test/data/test_collection.yaml
129
150
  - test/data/test_collection/GABPA_f1.pat
130
151
  - test/data/test_collection/KLF4_f2.pat
131
152
  - test/data/test_collection/SP1_f1.pat
132
- - test/eval_alignment_similarity_test.rb
153
+ - test/data/test_collection_pcm/GABPA_f1.pcm
154
+ - test/data/test_collection_pcm/KLF4_f2.pcm
155
+ - test/data/test_collection_pcm/SP1_f1.pcm
156
+ - test/data/test_collection_single_file.txt
157
+ - test/data/test_collection_single_file_pcm.txt
158
+ - test/eval_alignment_test.rb
133
159
  - test/eval_similarity_test.rb
134
160
  - test/find_pvalue_test.rb
135
161
  - test/find_threshold_test.rb
@@ -1,125 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby eval_alignment.rb <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
4
- type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
5
- or in linux
6
- cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
7
-
8
- Options:
9
- [-p <P-value>]
10
- [-d <discretization level>]
11
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
12
-
13
- Output format:
14
- <jaccard similarity coefficient>
15
- <number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the given alignment>
16
- <aligned 1st matrix>
17
- <aligned 2nd matrix>
18
- <shift> <orientation>
19
-
20
- Examples:
21
- ruby eval_alignment.rb motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
22
- or on windows
23
- type motifs/SP1.pat | ruby eval_alignment.rb motifs/KLF4.pat .stdin 0 revcomp -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
24
- or in linux
25
- cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
26
- }
27
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
28
- require 'macroape'
29
-
30
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
31
- STDERR.puts help_string
32
- exit
33
- end
34
-
35
- pvalue = 0.0005
36
- discretization = 10
37
-
38
- first_background = [1,1,1,1]
39
- second_background = [1,1,1,1]
40
-
41
- begin
42
- first_file = ARGV.shift
43
- second_file = ARGV.shift
44
-
45
- shift = ARGV.shift
46
- orientation = ARGV.shift
47
-
48
- raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
49
- raise 'You\'d specify shift' unless shift
50
- raise 'You\'d specify orientation' unless orientation
51
-
52
- shift = shift.to_i
53
- orientation = orientation.to_sym
54
-
55
- case orientation
56
- when :direct
57
- reverse = false
58
- when :revcomp
59
- reverse = true
60
- else
61
- raise 'Unknown orientation(direct/revcomp)'
62
- end
63
-
64
-
65
- until ARGV.empty?
66
- case ARGV.shift
67
- when '-p'
68
- pvalue = ARGV.shift.to_f
69
- when '-d'
70
- discretization = ARGV.shift.to_f
71
- when '-m'
72
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
73
- when '-md'
74
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
75
- when '-b'
76
- second_background = first_background = ARGV.shift(4).map(&:to_f)
77
- when '-b1'
78
- first_background = ARGV.shift(4).map(&:to_f)
79
- when '-b2'
80
- second_background = ARGV.shift(4).map(&:to_f)
81
- end
82
- end
83
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
84
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
85
-
86
-
87
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
88
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
89
-
90
- # if first_file == '.stdin' || second_file == '.stdin'
91
- # r_stream, w_stream = IO.pipe
92
- # STDIN.readlines.each{|line| w_stream.write(line)}
93
- # w_stream.close
94
- # end
95
-
96
- if first_file == '.stdin'
97
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
98
- # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
99
- else
100
- raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
101
- pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
102
- end
103
-
104
- if second_file == '.stdin'
105
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
106
- # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
107
- else
108
- raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
109
- pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
110
- end
111
-
112
- # r_stream.close if first_file == '.stdin' || second_file == '.stdin'
113
-
114
- cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
115
-
116
- first_threshold = pwm_first.threshold(pvalue)
117
- second_threshold = pwm_second.threshold(pvalue)
118
-
119
- info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
120
-
121
- puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
122
-
123
- rescue => err
124
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
125
- end
@@ -1,108 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby eval_similarity.rb <1st matrix pat-file> <2nd matrix pat-file> [options]
4
- or on windows
5
- type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
6
- or in linux
7
- cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
8
-
9
- Options:
10
- [-p <P-value>]
11
- [-d <discretization level>]
12
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
13
-
14
- Output has format:
15
- <jaccard similarity coefficient>
16
- <number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the optimal alignment>
17
- <optimal alignment, the 1st matrix>
18
- <optimal alignment, the 2nd matrix>
19
- <shift> <orientation>
20
-
21
- Examples:
22
- ruby eval_similarity.rb motifs/KLF4.pat motifs/SP1.pat -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
23
- or on windows
24
- type motifs/SP1.pat | ruby eval_similarity.rb motifs/KLF4.pat .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
25
- or in linux
26
- cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
27
- }
28
-
29
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
30
- require 'macroape'
31
-
32
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
33
- STDERR.puts help_string
34
- exit
35
- end
36
-
37
- pvalue = 0.0005
38
- discretization = 10
39
-
40
- first_background = [1,1,1,1]
41
- second_background = [1,1,1,1]
42
-
43
- begin
44
- first_file = ARGV.shift
45
- second_file = ARGV.shift
46
- raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
47
-
48
- until ARGV.empty?
49
- case ARGV.shift
50
- when '-p'
51
- pvalue = ARGV.shift.to_f
52
- when '-d'
53
- discretization = ARGV.shift.to_f
54
- when '-m'
55
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
56
- when '-md'
57
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
58
- when '-b'
59
- second_background = first_background = ARGV.shift(4).map(&:to_f)
60
- when '-b1'
61
- first_background = ARGV.shift(4).map(&:to_f)
62
- when '-b2'
63
- second_background = ARGV.shift(4).map(&:to_f)
64
- end
65
- end
66
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
67
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
68
-
69
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
70
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
71
-
72
-
73
- # if first_file == '.stdin' || second_file == '.stdin'
74
- # r_stream, w_stream = IO.pipe
75
- # STDIN.readlines.each{|line| w_stream.write(line)}
76
- # w_stream.close
77
- # end
78
-
79
- if first_file == '.stdin'
80
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
81
- # pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
82
- else
83
- raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
84
- pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
85
- end
86
-
87
- if second_file == '.stdin'
88
- # r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
89
- # pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
90
- else
91
- raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
92
- pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
93
- end
94
-
95
- r_stream.close if first_file == '.stdin' || second_file == '.stdin'
96
-
97
- cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
98
-
99
- first_threshold = pwm_first.threshold(pvalue)
100
- second_threshold = pwm_second.threshold(pvalue)
101
-
102
- info = cmp.jaccard(first_threshold, second_threshold)
103
-
104
- puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
105
-
106
- rescue => err
107
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
108
- end
@@ -1,81 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby find_pvalue.rb <pat-file> <threshold list> [options]
4
- or in linux
5
- cat <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
6
- or on windows
7
- type <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
8
-
9
- Options:
10
- [-d <discretization level>]
11
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
12
-
13
- Output format:
14
- threshold_1 count_1 pvalue_1
15
- threshold_2 count_2 pvalue_2
16
- threshold_3 count_3 pvalue_3
17
- The results are printed out in the same order as in the given threshold list.
18
-
19
- Examples:
20
- ruby find_pvalue.rb motifs/KLF4.pat 7.32 -d 1000 -b 0.2 0.3 0.2 0.3
21
- or on windows
22
- type motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
23
- or in linux
24
- cat motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
25
- }
26
-
27
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
28
- require 'macroape'
29
-
30
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
31
- STDERR.puts help_string
32
- exit
33
- end
34
-
35
- discretization = 10000
36
- background = [1,1,1,1]
37
- thresholds = []
38
- begin
39
- filename = ARGV.shift
40
-
41
- loop do
42
- begin
43
- Float(ARGV.first)
44
- thresholds << ARGV.shift.to_f
45
- rescue
46
- raise StopIteration
47
- end
48
- end
49
-
50
- raise "No input. You'd specify input source: filename or .stdin" unless filename
51
- raise 'You should specify at least one threshold' if thresholds.empty?
52
-
53
- until ARGV.empty?
54
- case ARGV.shift
55
- when '-b'
56
- background = ARGV.shift(4).map(&:to_f)
57
- when '-d'
58
- discretization = ARGV.shift.to_f
59
- when '-m'
60
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
61
- end
62
- end
63
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
64
-
65
-
66
- if filename == '.stdin'
67
- # TODO
68
- else
69
- raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
70
- pwm = Bioinform::PWM.new( File.read(filename) )
71
- end
72
- pwm.background(background)
73
-
74
- counts = pwm.discrete(discretization).counts_by_thresholds(* thresholds.map{|count| count * discretization})
75
- pvalues = counts.map{|count| count.to_f / pwm.vocabulary_volume}
76
- pvalues.zip(thresholds,counts).each{|pvalue,threshold,count|
77
- puts "#{threshold}\t#{count}\t#{pvalue}"
78
- }
79
- rescue => err
80
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
81
- end
@@ -1,77 +0,0 @@
1
- help_string = %q{
2
- Command-line format::
3
- ruby find_threshold.rb <pat-file> [options]
4
- or in linux
5
- cat <pat-file> | ruby find_threshold.rb .stdin [options]
6
- or on windows
7
- type <pat-file> | ruby find_threshold.rb .stdin [options]
8
-
9
- Options:
10
- [-p <list of P-values>]
11
- [-d <discretization level>]
12
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
13
-
14
- Output format:
15
- requested_pvalue_1 threshold_1 achieved_pvalue_1
16
- requested_pvalue_2 threshold_2 achieved_pvalue_2
17
-
18
-
19
- Example:
20
- ruby find_threshold.rb motifs/KLF4.pat -p 0.001 0.0001 0.0005 -d 1000 -b 0.4 0.3 0.2 0.1
21
- }
22
-
23
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
24
- require 'macroape'
25
-
26
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
27
- STDERR.puts help_string
28
- exit
29
- end
30
-
31
- background = [1,1,1,1]
32
- default_pvalues = [0.0005]
33
- discretization = 10000
34
-
35
- begin
36
- filename = ARGV.shift
37
- raise "No input. You'd specify input source: filename or .stdin" unless filename
38
-
39
- pvalues = []
40
- until ARGV.empty?
41
- case ARGV.shift
42
- when '-b'
43
- background = ARGV.shift(4).map(&:to_f)
44
- when '-m'
45
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
46
- when '-p'
47
- loop do
48
- begin
49
- Float(ARGV.first)
50
- pvalues << ARGV.shift.to_f
51
- rescue
52
- raise StopIteration
53
- end
54
- end
55
- when '-d'
56
- discretization = ARGV.shift.to_f
57
- end
58
- end
59
- pvalues = default_pvalues if pvalues.empty?
60
-
61
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
62
-
63
- if filename == '.stdin'
64
- ## TODO
65
- else
66
- raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
67
- pwm = Bioinform::PWM.new( File.read(filename) )
68
- end
69
-
70
- pwm.background(background)
71
-
72
- pwm.discrete(discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
73
- puts "#{pvalue}\t#{threshold / discretization}\t#{real_pvalue}"
74
- end
75
- rescue => err
76
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
77
- end
@@ -1,101 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby preprocess_collection.rb <folder with PWMs> [options]
4
-
5
- Options:
6
- [-p <list of P-values>]
7
- [-d <rough discretization> <precise discretization>]
8
- [-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
9
- [-o <output file>]
10
- [--silent] - don't show current progress information during scan (by default this information's written into stderr)
11
-
12
- The tool stores preprocessed Macroape collection to the specified YAML-file.
13
-
14
- Example:
15
- ruby preprocess_collection.rb ./motifs -p 0.001 0.0005 0.0001 -d 1 10 -b 0.2 0.3 0.2 0.3 -o collection.yaml
16
- }
17
-
18
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
19
- require 'macroape'
20
- require 'yaml'
21
-
22
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
23
- STDERR.puts help_string
24
- exit
25
- end
26
-
27
- default_pvalues = [0.0005]
28
- background = [1,1,1,1]
29
- rough_discretization = 1
30
- precise_discretization = 10
31
- output_file = 'collection.yaml'
32
-
33
- begin
34
- folder = ARGV.shift
35
- raise "No input. You'd specify folder with pat-files" unless folder
36
- raise "Error! Folder #{folder} doesn't exist" unless Dir.exist?(folder)
37
-
38
- pvalues = []
39
- silent = false
40
- until ARGV.empty?
41
- case ARGV.shift
42
- when '-b'
43
- background = ARGV.shift(4).map(&:to_f)
44
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background == background.reverse
45
- when '-p'
46
- loop do
47
- begin
48
- Float(ARGV.first)
49
- pvalues << ARGV.shift.to_f
50
- rescue
51
- raise StopIteration
52
- end
53
- end
54
- when '-d'
55
- rough_discretization, precise_discretization = ARGV.shift(2).map(&:to_f).sort
56
- when '-o'
57
- output_file = ARGV.shift
58
- when '-m'
59
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
60
- when '-md'
61
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
62
- when '--silent'
63
- silent = true
64
- end
65
- end
66
- pvalues = default_pvalues if pvalues.empty?
67
-
68
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
69
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
70
-
71
- collection = Macroape::Collection.new(rough_discretization, precise_discretization, background, pvalues)
72
-
73
- current_dir = File.dirname(__FILE__)
74
- Dir.glob(File.join(folder,'*')) do |filename|
75
- STDERR.puts filename unless silent
76
- pwm = Bioinform::PWM.new(File.read(filename))
77
- pwm.name ||= File.basename(filename, File.extname(filename))
78
-
79
- # When support of onefile collections is introduced - then here should be check if name exists.
80
- # Otherwise it should skip motif and tell you about this
81
- # Also two command line options to fail on skipping or to skip silently should be included
82
-
83
- info = {rough: {}, precise: {}}
84
- pwm.background(background)
85
-
86
- pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
87
- info[:rough][pvalue] = threshold / rough_discretization
88
- end
89
-
90
- pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
91
- info[:precise][pvalue] = threshold / precise_discretization
92
- end
93
-
94
- collection.add_pwm(pwm, info)
95
- end
96
- File.open(output_file,'w') do |f|
97
- f.puts(collection.to_yaml)
98
- end
99
- rescue => err
100
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
101
- end