macroape 3.3.3 → 3.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Rakefile.rb +7 -22
- data/TODO.txt +7 -6
- data/bin/align_motifs +4 -0
- data/bin/eval_alignment +2 -1
- data/bin/eval_similarity +2 -1
- data/bin/find_pvalue +2 -1
- data/bin/find_threshold +2 -1
- data/bin/preprocess_collection +2 -1
- data/bin/scan_collection +2 -1
- data/lib/macroape/aligned_pair_intersection.rb +2 -3
- data/lib/macroape/cli/align_motifs.rb +49 -0
- data/lib/macroape/cli/eval_alignment.rb +124 -0
- data/lib/macroape/cli/eval_similarity.rb +107 -0
- data/lib/macroape/cli/find_pvalue.rb +89 -0
- data/lib/macroape/cli/find_threshold.rb +84 -0
- data/lib/macroape/cli/preprocess_collection.rb +123 -0
- data/lib/macroape/cli/scan_collection.rb +141 -0
- data/lib/macroape/cli.rb +5 -0
- data/lib/macroape/counting.rb +15 -1
- data/lib/macroape/pwm_compare.rb +21 -1
- data/lib/macroape/pwm_compare_aligned.rb +21 -0
- data/lib/macroape/version.rb +1 -1
- data/macroape.gemspec +1 -1
- data/test/align_motifs_test.rb +12 -0
- data/test/data/KLF3_f1.pat +16 -0
- data/test/data/KLF3_f1.pcm +16 -0
- data/test/data/KLF4_f2.pcm +11 -0
- data/test/data/SP1_f1.pat +11 -11
- data/test/data/SP1_f1.pcm +12 -0
- data/test/data/SP1_f1_revcomp.pat +11 -11
- data/test/data/SP1_f1_revcomp.pcm +12 -0
- data/test/data/test_collection/SP1_f1.pat +11 -11
- data/test/data/test_collection.yaml +49 -109
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
- data/test/data/test_collection_single_file.txt +38 -0
- data/test/data/test_collection_single_file_pcm.txt +38 -0
- data/test/eval_alignment_test.rb +31 -0
- data/test/eval_similarity_test.rb +28 -13
- data/test/find_pvalue_test.rb +10 -13
- data/test/find_threshold_test.rb +10 -5
- data/test/preprocess_collection_test.rb +36 -2
- data/test/scan_collection_test.rb +9 -4
- data/test/test_helper.rb +61 -2
- metadata +38 -12
- data/lib/macroape/exec/eval_alignment.rb +0 -125
- data/lib/macroape/exec/eval_similarity.rb +0 -108
- data/lib/macroape/exec/find_pvalue.rb +0 -81
- data/lib/macroape/exec/find_threshold.rb +0 -77
- data/lib/macroape/exec/preprocess_collection.rb +0 -101
- data/lib/macroape/exec/scan_collection.rb +0 -124
- data/test/eval_alignment_similarity_test.rb +0 -20
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macroape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bioinform
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0.1.
|
21
|
+
version: 0.1.5
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.1.
|
29
|
+
version: 0.1.5
|
30
30
|
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
31
31
|
Estimation. It's a bioinformatic tool for evaluating similarity measure and best
|
32
32
|
alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
|
@@ -35,6 +35,7 @@ description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-
|
|
35
35
|
email:
|
36
36
|
- prijutme4ty@gmail.com
|
37
37
|
executables:
|
38
|
+
- align_motifs
|
38
39
|
- eval_alignment
|
39
40
|
- eval_similarity
|
40
41
|
- find_pvalue
|
@@ -51,6 +52,7 @@ files:
|
|
51
52
|
- Rakefile.rb
|
52
53
|
- TODO.txt
|
53
54
|
- benchmark/similarity_benchmark.rb
|
55
|
+
- bin/align_motifs
|
54
56
|
- bin/eval_alignment
|
55
57
|
- bin/eval_similarity
|
56
58
|
- bin/find_pvalue
|
@@ -59,32 +61,45 @@ files:
|
|
59
61
|
- bin/scan_collection
|
60
62
|
- lib/macroape.rb
|
61
63
|
- lib/macroape/aligned_pair_intersection.rb
|
64
|
+
- lib/macroape/cli.rb
|
65
|
+
- lib/macroape/cli/align_motifs.rb
|
66
|
+
- lib/macroape/cli/eval_alignment.rb
|
67
|
+
- lib/macroape/cli/eval_similarity.rb
|
68
|
+
- lib/macroape/cli/find_pvalue.rb
|
69
|
+
- lib/macroape/cli/find_threshold.rb
|
70
|
+
- lib/macroape/cli/preprocess_collection.rb
|
71
|
+
- lib/macroape/cli/scan_collection.rb
|
62
72
|
- lib/macroape/collection.rb
|
63
73
|
- lib/macroape/counting.rb
|
64
|
-
- lib/macroape/exec/eval_alignment.rb
|
65
|
-
- lib/macroape/exec/eval_similarity.rb
|
66
|
-
- lib/macroape/exec/find_pvalue.rb
|
67
|
-
- lib/macroape/exec/find_threshold.rb
|
68
|
-
- lib/macroape/exec/preprocess_collection.rb
|
69
|
-
- lib/macroape/exec/scan_collection.rb
|
70
74
|
- lib/macroape/pwm_compare.rb
|
71
75
|
- lib/macroape/pwm_compare_aligned.rb
|
72
76
|
- lib/macroape/version.rb
|
73
77
|
- macroape.gemspec
|
74
78
|
- spec/count_distribution_spec.rb
|
75
79
|
- spec/spec_helper.rb
|
80
|
+
- test/align_motifs_test.rb
|
76
81
|
- test/data/AHR_si.pat
|
82
|
+
- test/data/KLF3_f1.pat
|
83
|
+
- test/data/KLF3_f1.pcm
|
77
84
|
- test/data/KLF4_f2.pat
|
85
|
+
- test/data/KLF4_f2.pcm
|
78
86
|
- test/data/KLF4_f2_scan_results_all.txt
|
79
87
|
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
80
88
|
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
81
89
|
- test/data/SP1_f1.pat
|
90
|
+
- test/data/SP1_f1.pcm
|
82
91
|
- test/data/SP1_f1_revcomp.pat
|
92
|
+
- test/data/SP1_f1_revcomp.pcm
|
83
93
|
- test/data/test_collection.yaml
|
84
94
|
- test/data/test_collection/GABPA_f1.pat
|
85
95
|
- test/data/test_collection/KLF4_f2.pat
|
86
96
|
- test/data/test_collection/SP1_f1.pat
|
87
|
-
- test/
|
97
|
+
- test/data/test_collection_pcm/GABPA_f1.pcm
|
98
|
+
- test/data/test_collection_pcm/KLF4_f2.pcm
|
99
|
+
- test/data/test_collection_pcm/SP1_f1.pcm
|
100
|
+
- test/data/test_collection_single_file.txt
|
101
|
+
- test/data/test_collection_single_file_pcm.txt
|
102
|
+
- test/eval_alignment_test.rb
|
88
103
|
- test/eval_similarity_test.rb
|
89
104
|
- test/find_pvalue_test.rb
|
90
105
|
- test/find_threshold_test.rb
|
@@ -118,18 +133,29 @@ summary: PWM comparison tool using MACROAPE approach
|
|
118
133
|
test_files:
|
119
134
|
- spec/count_distribution_spec.rb
|
120
135
|
- spec/spec_helper.rb
|
136
|
+
- test/align_motifs_test.rb
|
121
137
|
- test/data/AHR_si.pat
|
138
|
+
- test/data/KLF3_f1.pat
|
139
|
+
- test/data/KLF3_f1.pcm
|
122
140
|
- test/data/KLF4_f2.pat
|
141
|
+
- test/data/KLF4_f2.pcm
|
123
142
|
- test/data/KLF4_f2_scan_results_all.txt
|
124
143
|
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
125
144
|
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
126
145
|
- test/data/SP1_f1.pat
|
146
|
+
- test/data/SP1_f1.pcm
|
127
147
|
- test/data/SP1_f1_revcomp.pat
|
148
|
+
- test/data/SP1_f1_revcomp.pcm
|
128
149
|
- test/data/test_collection.yaml
|
129
150
|
- test/data/test_collection/GABPA_f1.pat
|
130
151
|
- test/data/test_collection/KLF4_f2.pat
|
131
152
|
- test/data/test_collection/SP1_f1.pat
|
132
|
-
- test/
|
153
|
+
- test/data/test_collection_pcm/GABPA_f1.pcm
|
154
|
+
- test/data/test_collection_pcm/KLF4_f2.pcm
|
155
|
+
- test/data/test_collection_pcm/SP1_f1.pcm
|
156
|
+
- test/data/test_collection_single_file.txt
|
157
|
+
- test/data/test_collection_single_file_pcm.txt
|
158
|
+
- test/eval_alignment_test.rb
|
133
159
|
- test/eval_similarity_test.rb
|
134
160
|
- test/find_pvalue_test.rb
|
135
161
|
- test/find_threshold_test.rb
|
@@ -1,125 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby eval_alignment.rb <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
|
4
|
-
type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
|
5
|
-
or in linux
|
6
|
-
cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
|
7
|
-
|
8
|
-
Options:
|
9
|
-
[-p <P-value>]
|
10
|
-
[-d <discretization level>]
|
11
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
12
|
-
|
13
|
-
Output format:
|
14
|
-
<jaccard similarity coefficient>
|
15
|
-
<number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the given alignment>
|
16
|
-
<aligned 1st matrix>
|
17
|
-
<aligned 2nd matrix>
|
18
|
-
<shift> <orientation>
|
19
|
-
|
20
|
-
Examples:
|
21
|
-
ruby eval_alignment.rb motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
22
|
-
or on windows
|
23
|
-
type motifs/SP1.pat | ruby eval_alignment.rb motifs/KLF4.pat .stdin 0 revcomp -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
24
|
-
or in linux
|
25
|
-
cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
26
|
-
}
|
27
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
28
|
-
require 'macroape'
|
29
|
-
|
30
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
31
|
-
STDERR.puts help_string
|
32
|
-
exit
|
33
|
-
end
|
34
|
-
|
35
|
-
pvalue = 0.0005
|
36
|
-
discretization = 10
|
37
|
-
|
38
|
-
first_background = [1,1,1,1]
|
39
|
-
second_background = [1,1,1,1]
|
40
|
-
|
41
|
-
begin
|
42
|
-
first_file = ARGV.shift
|
43
|
-
second_file = ARGV.shift
|
44
|
-
|
45
|
-
shift = ARGV.shift
|
46
|
-
orientation = ARGV.shift
|
47
|
-
|
48
|
-
raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
|
49
|
-
raise 'You\'d specify shift' unless shift
|
50
|
-
raise 'You\'d specify orientation' unless orientation
|
51
|
-
|
52
|
-
shift = shift.to_i
|
53
|
-
orientation = orientation.to_sym
|
54
|
-
|
55
|
-
case orientation
|
56
|
-
when :direct
|
57
|
-
reverse = false
|
58
|
-
when :revcomp
|
59
|
-
reverse = true
|
60
|
-
else
|
61
|
-
raise 'Unknown orientation(direct/revcomp)'
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
until ARGV.empty?
|
66
|
-
case ARGV.shift
|
67
|
-
when '-p'
|
68
|
-
pvalue = ARGV.shift.to_f
|
69
|
-
when '-d'
|
70
|
-
discretization = ARGV.shift.to_f
|
71
|
-
when '-m'
|
72
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
73
|
-
when '-md'
|
74
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
75
|
-
when '-b'
|
76
|
-
second_background = first_background = ARGV.shift(4).map(&:to_f)
|
77
|
-
when '-b1'
|
78
|
-
first_background = ARGV.shift(4).map(&:to_f)
|
79
|
-
when '-b2'
|
80
|
-
second_background = ARGV.shift(4).map(&:to_f)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
|
84
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
|
85
|
-
|
86
|
-
|
87
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
88
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
89
|
-
|
90
|
-
# if first_file == '.stdin' || second_file == '.stdin'
|
91
|
-
# r_stream, w_stream = IO.pipe
|
92
|
-
# STDIN.readlines.each{|line| w_stream.write(line)}
|
93
|
-
# w_stream.close
|
94
|
-
# end
|
95
|
-
|
96
|
-
if first_file == '.stdin'
|
97
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
98
|
-
# pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
|
99
|
-
else
|
100
|
-
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
101
|
-
pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
|
102
|
-
end
|
103
|
-
|
104
|
-
if second_file == '.stdin'
|
105
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
106
|
-
# pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
|
107
|
-
else
|
108
|
-
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
109
|
-
pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
|
110
|
-
end
|
111
|
-
|
112
|
-
# r_stream.close if first_file == '.stdin' || second_file == '.stdin'
|
113
|
-
|
114
|
-
cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
|
115
|
-
|
116
|
-
first_threshold = pwm_first.threshold(pvalue)
|
117
|
-
second_threshold = pwm_second.threshold(pvalue)
|
118
|
-
|
119
|
-
info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
|
120
|
-
|
121
|
-
puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
|
122
|
-
|
123
|
-
rescue => err
|
124
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
125
|
-
end
|
@@ -1,108 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby eval_similarity.rb <1st matrix pat-file> <2nd matrix pat-file> [options]
|
4
|
-
or on windows
|
5
|
-
type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
|
6
|
-
or in linux
|
7
|
-
cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-p <P-value>]
|
11
|
-
[-d <discretization level>]
|
12
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
13
|
-
|
14
|
-
Output has format:
|
15
|
-
<jaccard similarity coefficient>
|
16
|
-
<number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the optimal alignment>
|
17
|
-
<optimal alignment, the 1st matrix>
|
18
|
-
<optimal alignment, the 2nd matrix>
|
19
|
-
<shift> <orientation>
|
20
|
-
|
21
|
-
Examples:
|
22
|
-
ruby eval_similarity.rb motifs/KLF4.pat motifs/SP1.pat -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
23
|
-
or on windows
|
24
|
-
type motifs/SP1.pat | ruby eval_similarity.rb motifs/KLF4.pat .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
25
|
-
or in linux
|
26
|
-
cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
27
|
-
}
|
28
|
-
|
29
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
30
|
-
require 'macroape'
|
31
|
-
|
32
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
33
|
-
STDERR.puts help_string
|
34
|
-
exit
|
35
|
-
end
|
36
|
-
|
37
|
-
pvalue = 0.0005
|
38
|
-
discretization = 10
|
39
|
-
|
40
|
-
first_background = [1,1,1,1]
|
41
|
-
second_background = [1,1,1,1]
|
42
|
-
|
43
|
-
begin
|
44
|
-
first_file = ARGV.shift
|
45
|
-
second_file = ARGV.shift
|
46
|
-
raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
|
47
|
-
|
48
|
-
until ARGV.empty?
|
49
|
-
case ARGV.shift
|
50
|
-
when '-p'
|
51
|
-
pvalue = ARGV.shift.to_f
|
52
|
-
when '-d'
|
53
|
-
discretization = ARGV.shift.to_f
|
54
|
-
when '-m'
|
55
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
56
|
-
when '-md'
|
57
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
58
|
-
when '-b'
|
59
|
-
second_background = first_background = ARGV.shift(4).map(&:to_f)
|
60
|
-
when '-b1'
|
61
|
-
first_background = ARGV.shift(4).map(&:to_f)
|
62
|
-
when '-b2'
|
63
|
-
second_background = ARGV.shift(4).map(&:to_f)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
|
67
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
|
68
|
-
|
69
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
70
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
71
|
-
|
72
|
-
|
73
|
-
# if first_file == '.stdin' || second_file == '.stdin'
|
74
|
-
# r_stream, w_stream = IO.pipe
|
75
|
-
# STDIN.readlines.each{|line| w_stream.write(line)}
|
76
|
-
# w_stream.close
|
77
|
-
# end
|
78
|
-
|
79
|
-
if first_file == '.stdin'
|
80
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
81
|
-
# pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
|
82
|
-
else
|
83
|
-
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
84
|
-
pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
|
85
|
-
end
|
86
|
-
|
87
|
-
if second_file == '.stdin'
|
88
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
89
|
-
# pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
|
90
|
-
else
|
91
|
-
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
92
|
-
pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
|
93
|
-
end
|
94
|
-
|
95
|
-
r_stream.close if first_file == '.stdin' || second_file == '.stdin'
|
96
|
-
|
97
|
-
cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
|
98
|
-
|
99
|
-
first_threshold = pwm_first.threshold(pvalue)
|
100
|
-
second_threshold = pwm_second.threshold(pvalue)
|
101
|
-
|
102
|
-
info = cmp.jaccard(first_threshold, second_threshold)
|
103
|
-
|
104
|
-
puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
|
105
|
-
|
106
|
-
rescue => err
|
107
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
108
|
-
end
|
@@ -1,81 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby find_pvalue.rb <pat-file> <threshold list> [options]
|
4
|
-
or in linux
|
5
|
-
cat <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
|
6
|
-
or on windows
|
7
|
-
type <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-d <discretization level>]
|
11
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
12
|
-
|
13
|
-
Output format:
|
14
|
-
threshold_1 count_1 pvalue_1
|
15
|
-
threshold_2 count_2 pvalue_2
|
16
|
-
threshold_3 count_3 pvalue_3
|
17
|
-
The results are printed out in the same order as in the given threshold list.
|
18
|
-
|
19
|
-
Examples:
|
20
|
-
ruby find_pvalue.rb motifs/KLF4.pat 7.32 -d 1000 -b 0.2 0.3 0.2 0.3
|
21
|
-
or on windows
|
22
|
-
type motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
|
23
|
-
or in linux
|
24
|
-
cat motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
|
25
|
-
}
|
26
|
-
|
27
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
28
|
-
require 'macroape'
|
29
|
-
|
30
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
31
|
-
STDERR.puts help_string
|
32
|
-
exit
|
33
|
-
end
|
34
|
-
|
35
|
-
discretization = 10000
|
36
|
-
background = [1,1,1,1]
|
37
|
-
thresholds = []
|
38
|
-
begin
|
39
|
-
filename = ARGV.shift
|
40
|
-
|
41
|
-
loop do
|
42
|
-
begin
|
43
|
-
Float(ARGV.first)
|
44
|
-
thresholds << ARGV.shift.to_f
|
45
|
-
rescue
|
46
|
-
raise StopIteration
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
raise "No input. You'd specify input source: filename or .stdin" unless filename
|
51
|
-
raise 'You should specify at least one threshold' if thresholds.empty?
|
52
|
-
|
53
|
-
until ARGV.empty?
|
54
|
-
case ARGV.shift
|
55
|
-
when '-b'
|
56
|
-
background = ARGV.shift(4).map(&:to_f)
|
57
|
-
when '-d'
|
58
|
-
discretization = ARGV.shift.to_f
|
59
|
-
when '-m'
|
60
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
61
|
-
end
|
62
|
-
end
|
63
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
64
|
-
|
65
|
-
|
66
|
-
if filename == '.stdin'
|
67
|
-
# TODO
|
68
|
-
else
|
69
|
-
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
70
|
-
pwm = Bioinform::PWM.new( File.read(filename) )
|
71
|
-
end
|
72
|
-
pwm.background(background)
|
73
|
-
|
74
|
-
counts = pwm.discrete(discretization).counts_by_thresholds(* thresholds.map{|count| count * discretization})
|
75
|
-
pvalues = counts.map{|count| count.to_f / pwm.vocabulary_volume}
|
76
|
-
pvalues.zip(thresholds,counts).each{|pvalue,threshold,count|
|
77
|
-
puts "#{threshold}\t#{count}\t#{pvalue}"
|
78
|
-
}
|
79
|
-
rescue => err
|
80
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
81
|
-
end
|
@@ -1,77 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format::
|
3
|
-
ruby find_threshold.rb <pat-file> [options]
|
4
|
-
or in linux
|
5
|
-
cat <pat-file> | ruby find_threshold.rb .stdin [options]
|
6
|
-
or on windows
|
7
|
-
type <pat-file> | ruby find_threshold.rb .stdin [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-p <list of P-values>]
|
11
|
-
[-d <discretization level>]
|
12
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
13
|
-
|
14
|
-
Output format:
|
15
|
-
requested_pvalue_1 threshold_1 achieved_pvalue_1
|
16
|
-
requested_pvalue_2 threshold_2 achieved_pvalue_2
|
17
|
-
|
18
|
-
|
19
|
-
Example:
|
20
|
-
ruby find_threshold.rb motifs/KLF4.pat -p 0.001 0.0001 0.0005 -d 1000 -b 0.4 0.3 0.2 0.1
|
21
|
-
}
|
22
|
-
|
23
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
24
|
-
require 'macroape'
|
25
|
-
|
26
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
27
|
-
STDERR.puts help_string
|
28
|
-
exit
|
29
|
-
end
|
30
|
-
|
31
|
-
background = [1,1,1,1]
|
32
|
-
default_pvalues = [0.0005]
|
33
|
-
discretization = 10000
|
34
|
-
|
35
|
-
begin
|
36
|
-
filename = ARGV.shift
|
37
|
-
raise "No input. You'd specify input source: filename or .stdin" unless filename
|
38
|
-
|
39
|
-
pvalues = []
|
40
|
-
until ARGV.empty?
|
41
|
-
case ARGV.shift
|
42
|
-
when '-b'
|
43
|
-
background = ARGV.shift(4).map(&:to_f)
|
44
|
-
when '-m'
|
45
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
46
|
-
when '-p'
|
47
|
-
loop do
|
48
|
-
begin
|
49
|
-
Float(ARGV.first)
|
50
|
-
pvalues << ARGV.shift.to_f
|
51
|
-
rescue
|
52
|
-
raise StopIteration
|
53
|
-
end
|
54
|
-
end
|
55
|
-
when '-d'
|
56
|
-
discretization = ARGV.shift.to_f
|
57
|
-
end
|
58
|
-
end
|
59
|
-
pvalues = default_pvalues if pvalues.empty?
|
60
|
-
|
61
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
62
|
-
|
63
|
-
if filename == '.stdin'
|
64
|
-
## TODO
|
65
|
-
else
|
66
|
-
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
67
|
-
pwm = Bioinform::PWM.new( File.read(filename) )
|
68
|
-
end
|
69
|
-
|
70
|
-
pwm.background(background)
|
71
|
-
|
72
|
-
pwm.discrete(discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
73
|
-
puts "#{pvalue}\t#{threshold / discretization}\t#{real_pvalue}"
|
74
|
-
end
|
75
|
-
rescue => err
|
76
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
77
|
-
end
|
@@ -1,101 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby preprocess_collection.rb <folder with PWMs> [options]
|
4
|
-
|
5
|
-
Options:
|
6
|
-
[-p <list of P-values>]
|
7
|
-
[-d <rough discretization> <precise discretization>]
|
8
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
9
|
-
[-o <output file>]
|
10
|
-
[--silent] - don't show current progress information during scan (by default this information's written into stderr)
|
11
|
-
|
12
|
-
The tool stores preprocessed Macroape collection to the specified YAML-file.
|
13
|
-
|
14
|
-
Example:
|
15
|
-
ruby preprocess_collection.rb ./motifs -p 0.001 0.0005 0.0001 -d 1 10 -b 0.2 0.3 0.2 0.3 -o collection.yaml
|
16
|
-
}
|
17
|
-
|
18
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
19
|
-
require 'macroape'
|
20
|
-
require 'yaml'
|
21
|
-
|
22
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
23
|
-
STDERR.puts help_string
|
24
|
-
exit
|
25
|
-
end
|
26
|
-
|
27
|
-
default_pvalues = [0.0005]
|
28
|
-
background = [1,1,1,1]
|
29
|
-
rough_discretization = 1
|
30
|
-
precise_discretization = 10
|
31
|
-
output_file = 'collection.yaml'
|
32
|
-
|
33
|
-
begin
|
34
|
-
folder = ARGV.shift
|
35
|
-
raise "No input. You'd specify folder with pat-files" unless folder
|
36
|
-
raise "Error! Folder #{folder} doesn't exist" unless Dir.exist?(folder)
|
37
|
-
|
38
|
-
pvalues = []
|
39
|
-
silent = false
|
40
|
-
until ARGV.empty?
|
41
|
-
case ARGV.shift
|
42
|
-
when '-b'
|
43
|
-
background = ARGV.shift(4).map(&:to_f)
|
44
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background == background.reverse
|
45
|
-
when '-p'
|
46
|
-
loop do
|
47
|
-
begin
|
48
|
-
Float(ARGV.first)
|
49
|
-
pvalues << ARGV.shift.to_f
|
50
|
-
rescue
|
51
|
-
raise StopIteration
|
52
|
-
end
|
53
|
-
end
|
54
|
-
when '-d'
|
55
|
-
rough_discretization, precise_discretization = ARGV.shift(2).map(&:to_f).sort
|
56
|
-
when '-o'
|
57
|
-
output_file = ARGV.shift
|
58
|
-
when '-m'
|
59
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
60
|
-
when '-md'
|
61
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
62
|
-
when '--silent'
|
63
|
-
silent = true
|
64
|
-
end
|
65
|
-
end
|
66
|
-
pvalues = default_pvalues if pvalues.empty?
|
67
|
-
|
68
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
69
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
70
|
-
|
71
|
-
collection = Macroape::Collection.new(rough_discretization, precise_discretization, background, pvalues)
|
72
|
-
|
73
|
-
current_dir = File.dirname(__FILE__)
|
74
|
-
Dir.glob(File.join(folder,'*')) do |filename|
|
75
|
-
STDERR.puts filename unless silent
|
76
|
-
pwm = Bioinform::PWM.new(File.read(filename))
|
77
|
-
pwm.name ||= File.basename(filename, File.extname(filename))
|
78
|
-
|
79
|
-
# When support of onefile collections is introduced - then here should be check if name exists.
|
80
|
-
# Otherwise it should skip motif and tell you about this
|
81
|
-
# Also two command line options to fail on skipping or to skip silently should be included
|
82
|
-
|
83
|
-
info = {rough: {}, precise: {}}
|
84
|
-
pwm.background(background)
|
85
|
-
|
86
|
-
pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
87
|
-
info[:rough][pvalue] = threshold / rough_discretization
|
88
|
-
end
|
89
|
-
|
90
|
-
pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
91
|
-
info[:precise][pvalue] = threshold / precise_discretization
|
92
|
-
end
|
93
|
-
|
94
|
-
collection.add_pwm(pwm, info)
|
95
|
-
end
|
96
|
-
File.open(output_file,'w') do |f|
|
97
|
-
f.puts(collection.to_yaml)
|
98
|
-
end
|
99
|
-
rescue => err
|
100
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
101
|
-
end
|