macroape 3.3.3 → 3.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Rakefile.rb +7 -22
- data/TODO.txt +7 -6
- data/bin/align_motifs +4 -0
- data/bin/eval_alignment +2 -1
- data/bin/eval_similarity +2 -1
- data/bin/find_pvalue +2 -1
- data/bin/find_threshold +2 -1
- data/bin/preprocess_collection +2 -1
- data/bin/scan_collection +2 -1
- data/lib/macroape/aligned_pair_intersection.rb +2 -3
- data/lib/macroape/cli/align_motifs.rb +49 -0
- data/lib/macroape/cli/eval_alignment.rb +124 -0
- data/lib/macroape/cli/eval_similarity.rb +107 -0
- data/lib/macroape/cli/find_pvalue.rb +89 -0
- data/lib/macroape/cli/find_threshold.rb +84 -0
- data/lib/macroape/cli/preprocess_collection.rb +123 -0
- data/lib/macroape/cli/scan_collection.rb +141 -0
- data/lib/macroape/cli.rb +5 -0
- data/lib/macroape/counting.rb +15 -1
- data/lib/macroape/pwm_compare.rb +21 -1
- data/lib/macroape/pwm_compare_aligned.rb +21 -0
- data/lib/macroape/version.rb +1 -1
- data/macroape.gemspec +1 -1
- data/test/align_motifs_test.rb +12 -0
- data/test/data/KLF3_f1.pat +16 -0
- data/test/data/KLF3_f1.pcm +16 -0
- data/test/data/KLF4_f2.pcm +11 -0
- data/test/data/SP1_f1.pat +11 -11
- data/test/data/SP1_f1.pcm +12 -0
- data/test/data/SP1_f1_revcomp.pat +11 -11
- data/test/data/SP1_f1_revcomp.pcm +12 -0
- data/test/data/test_collection/SP1_f1.pat +11 -11
- data/test/data/test_collection.yaml +49 -109
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
- data/test/data/test_collection_single_file.txt +38 -0
- data/test/data/test_collection_single_file_pcm.txt +38 -0
- data/test/eval_alignment_test.rb +31 -0
- data/test/eval_similarity_test.rb +28 -13
- data/test/find_pvalue_test.rb +10 -13
- data/test/find_threshold_test.rb +10 -5
- data/test/preprocess_collection_test.rb +36 -2
- data/test/scan_collection_test.rb +9 -4
- data/test/test_helper.rb +61 -2
- metadata +38 -12
- data/lib/macroape/exec/eval_alignment.rb +0 -125
- data/lib/macroape/exec/eval_similarity.rb +0 -108
- data/lib/macroape/exec/find_pvalue.rb +0 -81
- data/lib/macroape/exec/find_threshold.rb +0 -77
- data/lib/macroape/exec/preprocess_collection.rb +0 -101
- data/lib/macroape/exec/scan_collection.rb +0 -124
- data/test/eval_alignment_similarity_test.rb +0 -20
@@ -1,124 +0,0 @@
|
|
1
|
-
help_string = %q{
|
2
|
-
Command-line format:
|
3
|
-
ruby scan_collection.rb <pat-file> <collection> [options]
|
4
|
-
or in linux
|
5
|
-
cat <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
|
6
|
-
or on windows
|
7
|
-
type <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
|
8
|
-
|
9
|
-
Options:
|
10
|
-
[-p <P-value>]
|
11
|
-
[-c <similarity cutoff (minimal similarity to be included in output)> ] or [--all], '-c 0.05' by default
|
12
|
-
[--precise [<level, minimal similarity to check on a more precise discretization level on the second pass>]], off by default, '--precise 0.01' if level is not set
|
13
|
-
[--silent] - don't show current progress information during scan (by default this information's written into stderr)
|
14
|
-
|
15
|
-
Output format:
|
16
|
-
<name> <similarity jaccard index> <shift> <overlap> <orientation> * [in case that result calculated on the second pass(in precise mode)]
|
17
|
-
Attention! Name can contain whitespace characters.
|
18
|
-
Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
|
19
|
-
|
20
|
-
Example:
|
21
|
-
ruby scan_collection.rb motifs/KLF4.pat collection.yaml -p 0.005
|
22
|
-
or in linux
|
23
|
-
cat motifs/KLF4.pat | ruby scan_collection.rb .stdin collection.yaml -p 0.005 --precise 0.03
|
24
|
-
}
|
25
|
-
|
26
|
-
$:.unshift File.join(File.dirname(__FILE__),'./../../')
|
27
|
-
require 'macroape'
|
28
|
-
require 'yaml'
|
29
|
-
|
30
|
-
if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
|
31
|
-
STDERR.puts help_string
|
32
|
-
exit
|
33
|
-
end
|
34
|
-
|
35
|
-
begin
|
36
|
-
filename = ARGV.shift
|
37
|
-
collection_file = ARGV.shift
|
38
|
-
raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
|
39
|
-
raise "No input. You'd specify input file with collection" unless collection_file
|
40
|
-
raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
|
41
|
-
|
42
|
-
pvalue = 0.0005
|
43
|
-
cutoff = 0.05 # minimal similarity to output
|
44
|
-
collection = YAML.load_file(collection_file)
|
45
|
-
background_query = collection.background
|
46
|
-
|
47
|
-
silent = false
|
48
|
-
precision_mode = :rough
|
49
|
-
until ARGV.empty?
|
50
|
-
case ARGV.shift
|
51
|
-
when '-bq'
|
52
|
-
background_query = ARGV.shift(4).map(&:to_f)
|
53
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background_query == background_query.reverse
|
54
|
-
when '-p'
|
55
|
-
pvalue = ARGV.shift.to_f
|
56
|
-
when '-m'
|
57
|
-
Macroape::MaxHashSizeSingle = ARGV.shift.to_f
|
58
|
-
when '-md'
|
59
|
-
Macroape::MaxHashSizeDouble = ARGV.shift.to_f
|
60
|
-
when '-c'
|
61
|
-
cutoff = ARGV.shift.to_f
|
62
|
-
when '--all'
|
63
|
-
cutoff = 0.0
|
64
|
-
when '--silent'
|
65
|
-
silent = true
|
66
|
-
when '--precise'
|
67
|
-
precision_mode = :precise
|
68
|
-
begin
|
69
|
-
Float(ARGV.first)
|
70
|
-
minimal_similarity = ARGV.shift.to_f
|
71
|
-
rescue
|
72
|
-
minimal_similarity = 0.05
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
|
77
|
-
Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
|
78
|
-
|
79
|
-
raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
|
80
|
-
|
81
|
-
if filename == '.stdin'
|
82
|
-
# query_pwm = Macroape::SingleMatrix.load_from_stdin(STDIN)
|
83
|
-
else
|
84
|
-
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
85
|
-
query_pwm = Bioinform::PWM.new(File.read(filename))
|
86
|
-
end
|
87
|
-
|
88
|
-
|
89
|
-
query_pwm_rough = query_pwm.background(background_query).discrete(collection.rough_discretization)
|
90
|
-
query_pwm_precise = query_pwm.background(background_query).discrete(collection.precise_discretization)
|
91
|
-
|
92
|
-
threshold = query_pwm_rough.threshold(pvalue)
|
93
|
-
threshold_precise = query_pwm_precise.threshold(pvalue)
|
94
|
-
|
95
|
-
similarities = {}
|
96
|
-
precision_file_mode = {}
|
97
|
-
|
98
|
-
collection.pwms.each_key do |name|
|
99
|
-
pwm = collection.pwms[name]
|
100
|
-
pwm_info = collection.infos[name]
|
101
|
-
STDERR.puts pwm.name unless silent
|
102
|
-
cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
|
103
|
-
info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
|
104
|
-
precision_file_mode[name] = :rough
|
105
|
-
|
106
|
-
if precision_mode == :precise and info[:similarity] >= minimal_similarity
|
107
|
-
cmp = Macroape::PWMCompare.new(query_pwm_precise, pwm.background(collection.background).discrete(collection.precise_discretization))
|
108
|
-
info = cmp.jaccard(threshold_precise, pwm_info[:precise][pvalue] * collection.precise_discretization)
|
109
|
-
precision_file_mode[name] = :precise
|
110
|
-
end
|
111
|
-
similarities[name] = info
|
112
|
-
end
|
113
|
-
|
114
|
-
puts "#pwm\tsimilarity\tshift\toverlap\torientation"
|
115
|
-
similarities.sort_by do |name, info|
|
116
|
-
info[:similarity]
|
117
|
-
end.reverse.each do |name, info|
|
118
|
-
precision_text = (precision_file_mode[name] == :precise) ? "\t*" : ""
|
119
|
-
puts "#{name}\t#{info[:similarity]}\t#{info[:shift]}\t#{info[:overlap]}\t#{info[:orientation]}#{precision_text}" if info[:similarity] >= cutoff
|
120
|
-
end
|
121
|
-
|
122
|
-
rescue => err
|
123
|
-
STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
|
124
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'test_helper'
|
2
|
-
|
3
|
-
puts "\n\neval_alignment test:"
|
4
|
-
class TestEvalAlignmentSimilarity < Test::Unit::TestCase
|
5
|
-
def test_process_at_optimal_alignment
|
6
|
-
IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')){|f|
|
7
|
-
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
|
8
|
-
}
|
9
|
-
end
|
10
|
-
def test_process_not_optimal_alignment
|
11
|
-
IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct')){|f|
|
12
|
-
assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
|
13
|
-
}
|
14
|
-
end
|
15
|
-
def test_process_at_optimal_alignment_reversed
|
16
|
-
IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp')){|f|
|
17
|
-
assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
|
18
|
-
}
|
19
|
-
end
|
20
|
-
end
|