macroape 3.3.3 → 3.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Rakefile.rb +7 -22
- data/TODO.txt +7 -6
- data/bin/align_motifs +4 -0
- data/bin/eval_alignment +2 -1
- data/bin/eval_similarity +2 -1
- data/bin/find_pvalue +2 -1
- data/bin/find_threshold +2 -1
- data/bin/preprocess_collection +2 -1
- data/bin/scan_collection +2 -1
- data/lib/macroape/aligned_pair_intersection.rb +2 -3
- data/lib/macroape/cli/align_motifs.rb +49 -0
- data/lib/macroape/cli/eval_alignment.rb +124 -0
- data/lib/macroape/cli/eval_similarity.rb +107 -0
- data/lib/macroape/cli/find_pvalue.rb +89 -0
- data/lib/macroape/cli/find_threshold.rb +84 -0
- data/lib/macroape/cli/preprocess_collection.rb +123 -0
- data/lib/macroape/cli/scan_collection.rb +141 -0
- data/lib/macroape/cli.rb +5 -0
- data/lib/macroape/counting.rb +15 -1
- data/lib/macroape/pwm_compare.rb +21 -1
- data/lib/macroape/pwm_compare_aligned.rb +21 -0
- data/lib/macroape/version.rb +1 -1
- data/macroape.gemspec +1 -1
- data/test/align_motifs_test.rb +12 -0
- data/test/data/KLF3_f1.pat +16 -0
- data/test/data/KLF3_f1.pcm +16 -0
- data/test/data/KLF4_f2.pcm +11 -0
- data/test/data/SP1_f1.pat +11 -11
- data/test/data/SP1_f1.pcm +12 -0
- data/test/data/SP1_f1_revcomp.pat +11 -11
- data/test/data/SP1_f1_revcomp.pcm +12 -0
- data/test/data/test_collection/SP1_f1.pat +11 -11
- data/test/data/test_collection.yaml +49 -109
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
- data/test/data/test_collection_single_file.txt +38 -0
- data/test/data/test_collection_single_file_pcm.txt +38 -0
- data/test/eval_alignment_test.rb +31 -0
- data/test/eval_similarity_test.rb +28 -13
- data/test/find_pvalue_test.rb +10 -13
- data/test/find_threshold_test.rb +10 -5
- data/test/preprocess_collection_test.rb +36 -2
- data/test/scan_collection_test.rb +9 -4
- data/test/test_helper.rb +61 -2
- metadata +38 -12
- data/lib/macroape/exec/eval_alignment.rb +0 -125
- data/lib/macroape/exec/eval_similarity.rb +0 -108
- data/lib/macroape/exec/find_pvalue.rb +0 -81
- data/lib/macroape/exec/find_threshold.rb +0 -77
- data/lib/macroape/exec/preprocess_collection.rb +0 -101
- data/lib/macroape/exec/scan_collection.rb +0 -124
- data/test/eval_alignment_similarity_test.rb +0 -20
@@ -1,124 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby scan_collection.rb <pat-file> <collection> [options]
|
4
|
-
or in linux
|
5
|
-
cat <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
|
6
|
-
or on windows
|
7
|
-
type <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-p <P-value>]
|
11
|
-
[-c <similarity cutoff (minimal similarity to be included in output)> ] or [--all], '-c 0.05' by default
|
12
|
-
[--precise [<level, minimal similarity to check on a more precise discretization level on the second pass>]], off by default, '--precise 0.01' if level is not set
|
13
|
-
[--silent] - don't show current progress information during scan (by default this information's written into stderr)
|
14
|
-
|
15
|
-
Output format:
|
16
|
-
<name> <similarity jaccard index> <shift> <overlap> <orientation> * [in case that result calculated on the second pass(in precise mode)]
|
17
|
-
Attention! Name can contain whitespace characters.
|
18
|
-
Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
|
19
|
-
|
20
|
-
Example:
|
21
|
-
ruby scan_collection.rb motifs/KLF4.pat collection.yaml -p 0.005
|
22
|
-
or in linux
|
23
|
-
cat motifs/KLF4.pat | ruby scan_collection.rb .stdin collection.yaml -p 0.005 --precise 0.03
|
24
|
-
}
|
25
|
-
|
26
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
27
|
-
require 'macroape'
|
28
|
-
require 'yaml'
|
29
|
-
|
30
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
31
|
-
STDERR.puts help_string
|
32
|
-
exit
|
33
|
-
end
|
34
|
-
|
35
|
-
begin
|
36
|
-
filename = ARGV.shift
|
37
|
-
collection_file = ARGV.shift
|
38
|
-
raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
|
39
|
-
raise "No input. You'd specify input file with collection" unless collection_file
|
40
|
-
raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
|
41
|
-
|
42
|
-
pvalue = 0.0005
|
43
|
-
cutoff = 0.05 # minimal similarity to output
|
44
|
-
collection = YAML.load_file(collection_file)
|
45
|
-
background_query = collection.background
|
46
|
-
|
47
|
-
silent = false
|
48
|
-
precision_mode = :rough
|
49
|
-
until ARGV.empty?
|
50
|
-
case ARGV.shift
|
51
|
-
when '-bq'
|
52
|
-
background_query = ARGV.shift(4).map(&:to_f)
|
53
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background_query == background_query.reverse
|
54
|
-
when '-p'
|
55
|
-
pvalue = ARGV.shift.to_f
|
56
|
-
when '-m'
|
57
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
58
|
-
when '-md'
|
59
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
60
|
-
when '-c'
|
61
|
-
cutoff = ARGV.shift.to_f
|
62
|
-
when '--all'
|
63
|
-
cutoff = 0.0
|
64
|
-
when '--silent'
|
65
|
-
silent = true
|
66
|
-
when '--precise'
|
67
|
-
precision_mode = :precise
|
68
|
-
begin
|
69
|
-
Float(ARGV.first)
|
70
|
-
minimal_similarity = ARGV.shift.to_f
|
71
|
-
rescue
|
72
|
-
minimal_similarity = 0.05
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
77
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
78
|
-
|
79
|
-
raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
|
80
|
-
|
81
|
-
if filename == '.stdin'
|
82
|
-
# query_pwm = Macroape::SingleMatrix.load_from_stdin(STDIN)
|
83
|
-
else
|
84
|
-
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
85
|
-
query_pwm = Bioinform::PWM.new(File.read(filename))
|
86
|
-
end
|
87
|
-
|
88
|
-
|
89
|
-
query_pwm_rough = query_pwm.background(background_query).discrete(collection.rough_discretization)
|
90
|
-
query_pwm_precise = query_pwm.background(background_query).discrete(collection.precise_discretization)
|
91
|
-
|
92
|
-
threshold = query_pwm_rough.threshold(pvalue)
|
93
|
-
threshold_precise = query_pwm_precise.threshold(pvalue)
|
94
|
-
|
95
|
-
similarities = {}
|
96
|
-
precision_file_mode = {}
|
97
|
-
|
98
|
-
collection.pwms.each_key do |name|
|
99
|
-
pwm = collection.pwms[name]
|
100
|
-
pwm_info = collection.infos[name]
|
101
|
-
STDERR.puts pwm.name unless silent
|
102
|
-
cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
|
103
|
-
info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
|
104
|
-
precision_file_mode[name] = :rough
|
105
|
-
|
106
|
-
if precision_mode == :precise and info[:similarity] >= minimal_similarity
|
107
|
-
cmp = Macroape::PWMCompare.new(query_pwm_precise, pwm.background(collection.background).discrete(collection.precise_discretization))
|
108
|
-
info = cmp.jaccard(threshold_precise, pwm_info[:precise][pvalue] * collection.precise_discretization)
|
109
|
-
precision_file_mode[name] = :precise
|
110
|
-
end
|
111
|
-
similarities[name] = info
|
112
|
-
end
|
113
|
-
|
114
|
-
puts "#pwm\tsimilarity\tshift\toverlap\torientation"
|
115
|
-
similarities.sort_by do |name, info|
|
116
|
-
info[:similarity]
|
117
|
-
end.reverse.each do |name, info|
|
118
|
-
precision_text = (precision_file_mode[name] == :precise) ? "\t*" : ""
|
119
|
-
puts "#{name}\t#{info[:similarity]}\t#{info[:shift]}\t#{info[:overlap]}\t#{info[:orientation]}#{precision_text}" if info[:similarity] >= cutoff
|
120
|
-
end
|
121
|
-
|
122
|
-
rescue => err
|
123
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
124
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
puts "\n\neval_alignment test:"
|
4
|
-
class TestEvalAlignmentSimilarity < Test::Unit::TestCase
|
5
|
-
def test_process_at_optimal_alignment
|
6
|
-
IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')){|f|
|
7
|
-
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
|
8
|
-
}
|
9
|
-
end
|
10
|
-
def test_process_not_optimal_alignment
|
11
|
-
IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct')){|f|
|
12
|
-
assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
|
13
|
-
}
|
14
|
-
end
|
15
|
-
def test_process_at_optimal_alignment_reversed
|
16
|
-
IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp')){|f|
|
17
|
-
assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
|
18
|
-
}
|
19
|
-
end
|
20
|
-
end
|