macroape 3.3.3 → 3.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Rakefile.rb +7 -22
- data/TODO.txt +7 -6
- data/bin/align_motifs +4 -0
- data/bin/eval_alignment +2 -1
- data/bin/eval_similarity +2 -1
- data/bin/find_pvalue +2 -1
- data/bin/find_threshold +2 -1
- data/bin/preprocess_collection +2 -1
- data/bin/scan_collection +2 -1
- data/lib/macroape/aligned_pair_intersection.rb +2 -3
- data/lib/macroape/cli/align_motifs.rb +49 -0
- data/lib/macroape/cli/eval_alignment.rb +124 -0
- data/lib/macroape/cli/eval_similarity.rb +107 -0
- data/lib/macroape/cli/find_pvalue.rb +89 -0
- data/lib/macroape/cli/find_threshold.rb +84 -0
- data/lib/macroape/cli/preprocess_collection.rb +123 -0
- data/lib/macroape/cli/scan_collection.rb +141 -0
- data/lib/macroape/cli.rb +5 -0
- data/lib/macroape/counting.rb +15 -1
- data/lib/macroape/pwm_compare.rb +21 -1
- data/lib/macroape/pwm_compare_aligned.rb +21 -0
- data/lib/macroape/version.rb +1 -1
- data/macroape.gemspec +1 -1
- data/test/align_motifs_test.rb +12 -0
- data/test/data/KLF3_f1.pat +16 -0
- data/test/data/KLF3_f1.pcm +16 -0
- data/test/data/KLF4_f2.pcm +11 -0
- data/test/data/SP1_f1.pat +11 -11
- data/test/data/SP1_f1.pcm +12 -0
- data/test/data/SP1_f1_revcomp.pat +11 -11
- data/test/data/SP1_f1_revcomp.pcm +12 -0
- data/test/data/test_collection/SP1_f1.pat +11 -11
- data/test/data/test_collection.yaml +49 -109
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
- data/test/data/test_collection_single_file.txt +38 -0
- data/test/data/test_collection_single_file_pcm.txt +38 -0
- data/test/eval_alignment_test.rb +31 -0
- data/test/eval_similarity_test.rb +28 -13
- data/test/find_pvalue_test.rb +10 -13
- data/test/find_threshold_test.rb +10 -5
- data/test/preprocess_collection_test.rb +36 -2
- data/test/scan_collection_test.rb +9 -4
- data/test/test_helper.rb +61 -2
- metadata +38 -12
- data/lib/macroape/exec/eval_alignment.rb +0 -125
- data/lib/macroape/exec/eval_similarity.rb +0 -108
- data/lib/macroape/exec/find_pvalue.rb +0 -81
- data/lib/macroape/exec/find_threshold.rb +0 -77
- data/lib/macroape/exec/preprocess_collection.rb +0 -101
- data/lib/macroape/exec/scan_collection.rb +0 -124
- data/test/eval_alignment_similarity_test.rb +0 -20
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macroape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bioinform
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0.1.
|
21
|
+
version: 0.1.5
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.1.
|
29
|
+
version: 0.1.5
|
30
30
|
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
31
31
|
Estimation. It's a bioinformatic tool for evaluating similarity measure and best
|
32
32
|
alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
|
@@ -35,6 +35,7 @@ description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-
|
|
35
35
|
email:
|
36
36
|
- prijutme4ty@gmail.com
|
37
37
|
executables:
|
38
|
+
- align_motifs
|
38
39
|
- eval_alignment
|
39
40
|
- eval_similarity
|
40
41
|
- find_pvalue
|
@@ -51,6 +52,7 @@ files:
|
|
51
52
|
- Rakefile.rb
|
52
53
|
- TODO.txt
|
53
54
|
- benchmark/similarity_benchmark.rb
|
55
|
+
- bin/align_motifs
|
54
56
|
- bin/eval_alignment
|
55
57
|
- bin/eval_similarity
|
56
58
|
- bin/find_pvalue
|
@@ -59,32 +61,45 @@ files:
|
|
59
61
|
- bin/scan_collection
|
60
62
|
- lib/macroape.rb
|
61
63
|
- lib/macroape/aligned_pair_intersection.rb
|
64
|
+
- lib/macroape/cli.rb
|
65
|
+
- lib/macroape/cli/align_motifs.rb
|
66
|
+
- lib/macroape/cli/eval_alignment.rb
|
67
|
+
- lib/macroape/cli/eval_similarity.rb
|
68
|
+
- lib/macroape/cli/find_pvalue.rb
|
69
|
+
- lib/macroape/cli/find_threshold.rb
|
70
|
+
- lib/macroape/cli/preprocess_collection.rb
|
71
|
+
- lib/macroape/cli/scan_collection.rb
|
62
72
|
- lib/macroape/collection.rb
|
63
73
|
- lib/macroape/counting.rb
|
64
|
-
- lib/macroape/exec/eval_alignment.rb
|
65
|
-
- lib/macroape/exec/eval_similarity.rb
|
66
|
-
- lib/macroape/exec/find_pvalue.rb
|
67
|
-
- lib/macroape/exec/find_threshold.rb
|
68
|
-
- lib/macroape/exec/preprocess_collection.rb
|
69
|
-
- lib/macroape/exec/scan_collection.rb
|
70
74
|
- lib/macroape/pwm_compare.rb
|
71
75
|
- lib/macroape/pwm_compare_aligned.rb
|
72
76
|
- lib/macroape/version.rb
|
73
77
|
- macroape.gemspec
|
74
78
|
- spec/count_distribution_spec.rb
|
75
79
|
- spec/spec_helper.rb
|
80
|
+
- test/align_motifs_test.rb
|
76
81
|
- test/data/AHR_si.pat
|
82
|
+
- test/data/KLF3_f1.pat
|
83
|
+
- test/data/KLF3_f1.pcm
|
77
84
|
- test/data/KLF4_f2.pat
|
85
|
+
- test/data/KLF4_f2.pcm
|
78
86
|
- test/data/KLF4_f2_scan_results_all.txt
|
79
87
|
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
80
88
|
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
81
89
|
- test/data/SP1_f1.pat
|
90
|
+
- test/data/SP1_f1.pcm
|
82
91
|
- test/data/SP1_f1_revcomp.pat
|
92
|
+
- test/data/SP1_f1_revcomp.pcm
|
83
93
|
- test/data/test_collection.yaml
|
84
94
|
- test/data/test_collection/GABPA_f1.pat
|
85
95
|
- test/data/test_collection/KLF4_f2.pat
|
86
96
|
- test/data/test_collection/SP1_f1.pat
|
87
|
-
- test/
|
97
|
+
- test/data/test_collection_pcm/GABPA_f1.pcm
|
98
|
+
- test/data/test_collection_pcm/KLF4_f2.pcm
|
99
|
+
- test/data/test_collection_pcm/SP1_f1.pcm
|
100
|
+
- test/data/test_collection_single_file.txt
|
101
|
+
- test/data/test_collection_single_file_pcm.txt
|
102
|
+
- test/eval_alignment_test.rb
|
88
103
|
- test/eval_similarity_test.rb
|
89
104
|
- test/find_pvalue_test.rb
|
90
105
|
- test/find_threshold_test.rb
|
@@ -118,18 +133,29 @@ summary: PWM comparison tool using MACROAPE approach
|
|
118
133
|
test_files:
|
119
134
|
- spec/count_distribution_spec.rb
|
120
135
|
- spec/spec_helper.rb
|
136
|
+
- test/align_motifs_test.rb
|
121
137
|
- test/data/AHR_si.pat
|
138
|
+
- test/data/KLF3_f1.pat
|
139
|
+
- test/data/KLF3_f1.pcm
|
122
140
|
- test/data/KLF4_f2.pat
|
141
|
+
- test/data/KLF4_f2.pcm
|
123
142
|
- test/data/KLF4_f2_scan_results_all.txt
|
124
143
|
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
125
144
|
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
126
145
|
- test/data/SP1_f1.pat
|
146
|
+
- test/data/SP1_f1.pcm
|
127
147
|
- test/data/SP1_f1_revcomp.pat
|
148
|
+
- test/data/SP1_f1_revcomp.pcm
|
128
149
|
- test/data/test_collection.yaml
|
129
150
|
- test/data/test_collection/GABPA_f1.pat
|
130
151
|
- test/data/test_collection/KLF4_f2.pat
|
131
152
|
- test/data/test_collection/SP1_f1.pat
|
132
|
-
- test/
|
153
|
+
- test/data/test_collection_pcm/GABPA_f1.pcm
|
154
|
+
- test/data/test_collection_pcm/KLF4_f2.pcm
|
155
|
+
- test/data/test_collection_pcm/SP1_f1.pcm
|
156
|
+
- test/data/test_collection_single_file.txt
|
157
|
+
- test/data/test_collection_single_file_pcm.txt
|
158
|
+
- test/eval_alignment_test.rb
|
133
159
|
- test/eval_similarity_test.rb
|
134
160
|
- test/find_pvalue_test.rb
|
135
161
|
- test/find_threshold_test.rb
|
@@ -1,125 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby eval_alignment.rb <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]
|
4
|
-
type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
|
5
|
-
or in linux
|
6
|
-
cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_alignment.rb .stdin .stdin <shift> <orientation(direct/revcomp)> [options]
|
7
|
-
|
8
|
-
Options:
|
9
|
-
[-p <P-value>]
|
10
|
-
[-d <discretization level>]
|
11
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
12
|
-
|
13
|
-
Output format:
|
14
|
-
<jaccard similarity coefficient>
|
15
|
-
<number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the given alignment>
|
16
|
-
<aligned 1st matrix>
|
17
|
-
<aligned 2nd matrix>
|
18
|
-
<shift> <orientation>
|
19
|
-
|
20
|
-
Examples:
|
21
|
-
ruby eval_alignment.rb motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
22
|
-
or on windows
|
23
|
-
type motifs/SP1.pat | ruby eval_alignment.rb motifs/KLF4.pat .stdin 0 revcomp -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
24
|
-
or in linux
|
25
|
-
cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_alignment.rb .stdin .stdin 3 direct -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
26
|
-
}
|
27
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
28
|
-
require 'macroape'
|
29
|
-
|
30
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
31
|
-
STDERR.puts help_string
|
32
|
-
exit
|
33
|
-
end
|
34
|
-
|
35
|
-
pvalue = 0.0005
|
36
|
-
discretization = 10
|
37
|
-
|
38
|
-
first_background = [1,1,1,1]
|
39
|
-
second_background = [1,1,1,1]
|
40
|
-
|
41
|
-
begin
|
42
|
-
first_file = ARGV.shift
|
43
|
-
second_file = ARGV.shift
|
44
|
-
|
45
|
-
shift = ARGV.shift
|
46
|
-
orientation = ARGV.shift
|
47
|
-
|
48
|
-
raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
|
49
|
-
raise 'You\'d specify shift' unless shift
|
50
|
-
raise 'You\'d specify orientation' unless orientation
|
51
|
-
|
52
|
-
shift = shift.to_i
|
53
|
-
orientation = orientation.to_sym
|
54
|
-
|
55
|
-
case orientation
|
56
|
-
when :direct
|
57
|
-
reverse = false
|
58
|
-
when :revcomp
|
59
|
-
reverse = true
|
60
|
-
else
|
61
|
-
raise 'Unknown orientation(direct/revcomp)'
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
until ARGV.empty?
|
66
|
-
case ARGV.shift
|
67
|
-
when '-p'
|
68
|
-
pvalue = ARGV.shift.to_f
|
69
|
-
when '-d'
|
70
|
-
discretization = ARGV.shift.to_f
|
71
|
-
when '-m'
|
72
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
73
|
-
when '-md'
|
74
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
75
|
-
when '-b'
|
76
|
-
second_background = first_background = ARGV.shift(4).map(&:to_f)
|
77
|
-
when '-b1'
|
78
|
-
first_background = ARGV.shift(4).map(&:to_f)
|
79
|
-
when '-b2'
|
80
|
-
second_background = ARGV.shift(4).map(&:to_f)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
|
84
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
|
85
|
-
|
86
|
-
|
87
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
88
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
89
|
-
|
90
|
-
# if first_file == '.stdin' || second_file == '.stdin'
|
91
|
-
# r_stream, w_stream = IO.pipe
|
92
|
-
# STDIN.readlines.each{|line| w_stream.write(line)}
|
93
|
-
# w_stream.close
|
94
|
-
# end
|
95
|
-
|
96
|
-
if first_file == '.stdin'
|
97
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
98
|
-
# pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
|
99
|
-
else
|
100
|
-
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
101
|
-
pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
|
102
|
-
end
|
103
|
-
|
104
|
-
if second_file == '.stdin'
|
105
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
106
|
-
# pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
|
107
|
-
else
|
108
|
-
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
109
|
-
pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
|
110
|
-
end
|
111
|
-
|
112
|
-
# r_stream.close if first_file == '.stdin' || second_file == '.stdin'
|
113
|
-
|
114
|
-
cmp = Macroape::PWMCompareAligned.new(pwm_first, pwm_second, shift, orientation)
|
115
|
-
|
116
|
-
first_threshold = pwm_first.threshold(pvalue)
|
117
|
-
second_threshold = pwm_second.threshold(pvalue)
|
118
|
-
|
119
|
-
info = cmp.alignment_infos.merge( cmp.jaccard(first_threshold, second_threshold) )
|
120
|
-
|
121
|
-
puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
|
122
|
-
|
123
|
-
rescue => err
|
124
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
125
|
-
end
|
@@ -1,108 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby eval_similarity.rb <1st matrix pat-file> <2nd matrix pat-file> [options]
|
4
|
-
or on windows
|
5
|
-
type <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
|
6
|
-
or in linux
|
7
|
-
cat <1st matrix pat-file> <2nd matrix pat-file> | ruby eval_similarity.rb .stdin .stdin [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-p <P-value>]
|
11
|
-
[-d <discretization level>]
|
12
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
13
|
-
|
14
|
-
Output has format:
|
15
|
-
<jaccard similarity coefficient>
|
16
|
-
<number of words recognized by both 1st and 2nd matrices | probability to draw a word recognized by both 1st and 2nd matrices> <length of the optimal alignment>
|
17
|
-
<optimal alignment, the 1st matrix>
|
18
|
-
<optimal alignment, the 2nd matrix>
|
19
|
-
<shift> <orientation>
|
20
|
-
|
21
|
-
Examples:
|
22
|
-
ruby eval_similarity.rb motifs/KLF4.pat motifs/SP1.pat -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
23
|
-
or on windows
|
24
|
-
type motifs/SP1.pat | ruby eval_similarity.rb motifs/KLF4.pat .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
25
|
-
or in linux
|
26
|
-
cat motifs/KLF4.pat motifs/SP1.pat | ruby eval_similarity.rb .stdin .stdin -p 0.0005 -d 100 -b 0.4 0.3 0.2 0.1
|
27
|
-
}
|
28
|
-
|
29
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
30
|
-
require 'macroape'
|
31
|
-
|
32
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
33
|
-
STDERR.puts help_string
|
34
|
-
exit
|
35
|
-
end
|
36
|
-
|
37
|
-
pvalue = 0.0005
|
38
|
-
discretization = 10
|
39
|
-
|
40
|
-
first_background = [1,1,1,1]
|
41
|
-
second_background = [1,1,1,1]
|
42
|
-
|
43
|
-
begin
|
44
|
-
first_file = ARGV.shift
|
45
|
-
second_file = ARGV.shift
|
46
|
-
raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
|
47
|
-
|
48
|
-
until ARGV.empty?
|
49
|
-
case ARGV.shift
|
50
|
-
when '-p'
|
51
|
-
pvalue = ARGV.shift.to_f
|
52
|
-
when '-d'
|
53
|
-
discretization = ARGV.shift.to_f
|
54
|
-
when '-m'
|
55
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
56
|
-
when '-md'
|
57
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
58
|
-
when '-b'
|
59
|
-
second_background = first_background = ARGV.shift(4).map(&:to_f)
|
60
|
-
when '-b1'
|
61
|
-
first_background = ARGV.shift(4).map(&:to_f)
|
62
|
-
when '-b2'
|
63
|
-
second_background = ARGV.shift(4).map(&:to_f)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background == first_background.reverse
|
67
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background == second_background.reverse
|
68
|
-
|
69
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
70
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
71
|
-
|
72
|
-
|
73
|
-
# if first_file == '.stdin' || second_file == '.stdin'
|
74
|
-
# r_stream, w_stream = IO.pipe
|
75
|
-
# STDIN.readlines.each{|line| w_stream.write(line)}
|
76
|
-
# w_stream.close
|
77
|
-
# end
|
78
|
-
|
79
|
-
if first_file == '.stdin'
|
80
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
81
|
-
# pwm_first = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(first_background).discrete(discretization)
|
82
|
-
else
|
83
|
-
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
84
|
-
pwm_first = Bioinform::PWM.new(File.read(first_file)).background(first_background).discrete(discretization)
|
85
|
-
end
|
86
|
-
|
87
|
-
if second_file == '.stdin'
|
88
|
-
# r_stream, w_stream, extracted_pwm = extract_pwm(r_stream, w_stream)
|
89
|
-
# pwm_second = Macroape::SingleMatrix.load_from_line_array(extracted_pwm).with_background(second_background).discrete(discretization)
|
90
|
-
else
|
91
|
-
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
92
|
-
pwm_second = Bioinform::PWM.new(File.read(second_file)).background(second_background).discrete(discretization)
|
93
|
-
end
|
94
|
-
|
95
|
-
r_stream.close if first_file == '.stdin' || second_file == '.stdin'
|
96
|
-
|
97
|
-
cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
|
98
|
-
|
99
|
-
first_threshold = pwm_first.threshold(pvalue)
|
100
|
-
second_threshold = pwm_second.threshold(pvalue)
|
101
|
-
|
102
|
-
info = cmp.jaccard(first_threshold, second_threshold)
|
103
|
-
|
104
|
-
puts "#{info[:similarity]}\n#{info[:recognized_by_both]}\t#{info[:alignment_length]}\n#{info[:text]}\n#{info[:shift]}\t#{info[:orientation]}"
|
105
|
-
|
106
|
-
rescue => err
|
107
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
108
|
-
end
|
@@ -1,81 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby find_pvalue.rb <pat-file> <threshold list> [options]
|
4
|
-
or in linux
|
5
|
-
cat <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
|
6
|
-
or on windows
|
7
|
-
type <pat-file> | ruby find_pvalue.rb .stdin <threshold> [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-d <discretization level>]
|
11
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
12
|
-
|
13
|
-
Output format:
|
14
|
-
threshold_1 count_1 pvalue_1
|
15
|
-
threshold_2 count_2 pvalue_2
|
16
|
-
threshold_3 count_3 pvalue_3
|
17
|
-
The results are printed out in the same order as in the given threshold list.
|
18
|
-
|
19
|
-
Examples:
|
20
|
-
ruby find_pvalue.rb motifs/KLF4.pat 7.32 -d 1000 -b 0.2 0.3 0.2 0.3
|
21
|
-
or on windows
|
22
|
-
type motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
|
23
|
-
or in linux
|
24
|
-
cat motifs/KLF4.pat | ruby find_pvalue.rb .stdin 7.32 4.31 5.42
|
25
|
-
}
|
26
|
-
|
27
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
28
|
-
require 'macroape'
|
29
|
-
|
30
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
31
|
-
STDERR.puts help_string
|
32
|
-
exit
|
33
|
-
end
|
34
|
-
|
35
|
-
discretization = 10000
|
36
|
-
background = [1,1,1,1]
|
37
|
-
thresholds = []
|
38
|
-
begin
|
39
|
-
filename = ARGV.shift
|
40
|
-
|
41
|
-
loop do
|
42
|
-
begin
|
43
|
-
Float(ARGV.first)
|
44
|
-
thresholds << ARGV.shift.to_f
|
45
|
-
rescue
|
46
|
-
raise StopIteration
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
raise "No input. You'd specify input source: filename or .stdin" unless filename
|
51
|
-
raise 'You should specify at least one threshold' if thresholds.empty?
|
52
|
-
|
53
|
-
until ARGV.empty?
|
54
|
-
case ARGV.shift
|
55
|
-
when '-b'
|
56
|
-
background = ARGV.shift(4).map(&:to_f)
|
57
|
-
when '-d'
|
58
|
-
discretization = ARGV.shift.to_f
|
59
|
-
when '-m'
|
60
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
61
|
-
end
|
62
|
-
end
|
63
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
64
|
-
|
65
|
-
|
66
|
-
if filename == '.stdin'
|
67
|
-
# TODO
|
68
|
-
else
|
69
|
-
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
70
|
-
pwm = Bioinform::PWM.new( File.read(filename) )
|
71
|
-
end
|
72
|
-
pwm.background(background)
|
73
|
-
|
74
|
-
counts = pwm.discrete(discretization).counts_by_thresholds(* thresholds.map{|count| count * discretization})
|
75
|
-
pvalues = counts.map{|count| count.to_f / pwm.vocabulary_volume}
|
76
|
-
pvalues.zip(thresholds,counts).each{|pvalue,threshold,count|
|
77
|
-
puts "#{threshold}\t#{count}\t#{pvalue}"
|
78
|
-
}
|
79
|
-
rescue => err
|
80
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
81
|
-
end
|
@@ -1,77 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format::
|
3
|
-
ruby find_threshold.rb <pat-file> [options]
|
4
|
-
or in linux
|
5
|
-
cat <pat-file> | ruby find_threshold.rb .stdin [options]
|
6
|
-
or on windows
|
7
|
-
type <pat-file> | ruby find_threshold.rb .stdin [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-p <list of P-values>]
|
11
|
-
[-d <discretization level>]
|
12
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
13
|
-
|
14
|
-
Output format:
|
15
|
-
requested_pvalue_1 threshold_1 achieved_pvalue_1
|
16
|
-
requested_pvalue_2 threshold_2 achieved_pvalue_2
|
17
|
-
|
18
|
-
|
19
|
-
Example:
|
20
|
-
ruby find_threshold.rb motifs/KLF4.pat -p 0.001 0.0001 0.0005 -d 1000 -b 0.4 0.3 0.2 0.1
|
21
|
-
}
|
22
|
-
|
23
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
24
|
-
require 'macroape'
|
25
|
-
|
26
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
27
|
-
STDERR.puts help_string
|
28
|
-
exit
|
29
|
-
end
|
30
|
-
|
31
|
-
background = [1,1,1,1]
|
32
|
-
default_pvalues = [0.0005]
|
33
|
-
discretization = 10000
|
34
|
-
|
35
|
-
begin
|
36
|
-
filename = ARGV.shift
|
37
|
-
raise "No input. You'd specify input source: filename or .stdin" unless filename
|
38
|
-
|
39
|
-
pvalues = []
|
40
|
-
until ARGV.empty?
|
41
|
-
case ARGV.shift
|
42
|
-
when '-b'
|
43
|
-
background = ARGV.shift(4).map(&:to_f)
|
44
|
-
when '-m'
|
45
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
46
|
-
when '-p'
|
47
|
-
loop do
|
48
|
-
begin
|
49
|
-
Float(ARGV.first)
|
50
|
-
pvalues << ARGV.shift.to_f
|
51
|
-
rescue
|
52
|
-
raise StopIteration
|
53
|
-
end
|
54
|
-
end
|
55
|
-
when '-d'
|
56
|
-
discretization = ARGV.shift.to_f
|
57
|
-
end
|
58
|
-
end
|
59
|
-
pvalues = default_pvalues if pvalues.empty?
|
60
|
-
|
61
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
62
|
-
|
63
|
-
if filename == '.stdin'
|
64
|
-
## TODO
|
65
|
-
else
|
66
|
-
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
67
|
-
pwm = Bioinform::PWM.new( File.read(filename) )
|
68
|
-
end
|
69
|
-
|
70
|
-
pwm.background(background)
|
71
|
-
|
72
|
-
pwm.discrete(discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
73
|
-
puts "#{pvalue}\t#{threshold / discretization}\t#{real_pvalue}"
|
74
|
-
end
|
75
|
-
rescue => err
|
76
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
77
|
-
end
|
@@ -1,101 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby preprocess_collection.rb <folder with PWMs> [options]
|
4
|
-
|
5
|
-
Options:
|
6
|
-
[-p <list of P-values>]
|
7
|
-
[-d <rough discretization> <precise discretization>]
|
8
|
-
[-b <background probabilities, ACGT - 4 numbers, space-delimited, sum should be equal to 1>]
|
9
|
-
[-o <output file>]
|
10
|
-
[--silent] - don't show current progress information during scan (by default this information's written into stderr)
|
11
|
-
|
12
|
-
The tool stores preprocessed Macroape collection to the specified YAML-file.
|
13
|
-
|
14
|
-
Example:
|
15
|
-
ruby preprocess_collection.rb ./motifs -p 0.001 0.0005 0.0001 -d 1 10 -b 0.2 0.3 0.2 0.3 -o collection.yaml
|
16
|
-
}
|
17
|
-
|
18
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
19
|
-
require 'macroape'
|
20
|
-
require 'yaml'
|
21
|
-
|
22
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
23
|
-
STDERR.puts help_string
|
24
|
-
exit
|
25
|
-
end
|
26
|
-
|
27
|
-
default_pvalues = [0.0005]
|
28
|
-
background = [1,1,1,1]
|
29
|
-
rough_discretization = 1
|
30
|
-
precise_discretization = 10
|
31
|
-
output_file = 'collection.yaml'
|
32
|
-
|
33
|
-
begin
|
34
|
-
folder = ARGV.shift
|
35
|
-
raise "No input. You'd specify folder with pat-files" unless folder
|
36
|
-
raise "Error! Folder #{folder} doesn't exist" unless Dir.exist?(folder)
|
37
|
-
|
38
|
-
pvalues = []
|
39
|
-
silent = false
|
40
|
-
until ARGV.empty?
|
41
|
-
case ARGV.shift
|
42
|
-
when '-b'
|
43
|
-
background = ARGV.shift(4).map(&:to_f)
|
44
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background == background.reverse
|
45
|
-
when '-p'
|
46
|
-
loop do
|
47
|
-
begin
|
48
|
-
Float(ARGV.first)
|
49
|
-
pvalues << ARGV.shift.to_f
|
50
|
-
rescue
|
51
|
-
raise StopIteration
|
52
|
-
end
|
53
|
-
end
|
54
|
-
when '-d'
|
55
|
-
rough_discretization, precise_discretization = ARGV.shift(2).map(&:to_f).sort
|
56
|
-
when '-o'
|
57
|
-
output_file = ARGV.shift
|
58
|
-
when '-m'
|
59
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
60
|
-
when '-md'
|
61
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
62
|
-
when '--silent'
|
63
|
-
silent = true
|
64
|
-
end
|
65
|
-
end
|
66
|
-
pvalues = default_pvalues if pvalues.empty?
|
67
|
-
|
68
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
69
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
70
|
-
|
71
|
-
collection = Macroape::Collection.new(rough_discretization, precise_discretization, background, pvalues)
|
72
|
-
|
73
|
-
current_dir = File.dirname(__FILE__)
|
74
|
-
Dir.glob(File.join(folder,'*')) do |filename|
|
75
|
-
STDERR.puts filename unless silent
|
76
|
-
pwm = Bioinform::PWM.new(File.read(filename))
|
77
|
-
pwm.name ||= File.basename(filename, File.extname(filename))
|
78
|
-
|
79
|
-
# When support of onefile collections is introduced - then here should be check if name exists.
|
80
|
-
# Otherwise it should skip motif and tell you about this
|
81
|
-
# Also two command line options to fail on skipping or to skip silently should be included
|
82
|
-
|
83
|
-
info = {rough: {}, precise: {}}
|
84
|
-
pwm.background(background)
|
85
|
-
|
86
|
-
pwm.discrete(rough_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
87
|
-
info[:rough][pvalue] = threshold / rough_discretization
|
88
|
-
end
|
89
|
-
|
90
|
-
pwm.discrete(precise_discretization).thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
91
|
-
info[:precise][pvalue] = threshold / precise_discretization
|
92
|
-
end
|
93
|
-
|
94
|
-
collection.add_pwm(pwm, info)
|
95
|
-
end
|
96
|
-
File.open(output_file,'w') do |f|
|
97
|
-
f.puts(collection.to_yaml)
|
98
|
-
end
|
99
|
-
rescue => err
|
100
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
101
|
-
end
|