macroape 3.3.3 → 3.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/.gitignore +1 -0
  2. data/Rakefile.rb +7 -22
  3. data/TODO.txt +7 -6
  4. data/bin/align_motifs +4 -0
  5. data/bin/eval_alignment +2 -1
  6. data/bin/eval_similarity +2 -1
  7. data/bin/find_pvalue +2 -1
  8. data/bin/find_threshold +2 -1
  9. data/bin/preprocess_collection +2 -1
  10. data/bin/scan_collection +2 -1
  11. data/lib/macroape/aligned_pair_intersection.rb +2 -3
  12. data/lib/macroape/cli/align_motifs.rb +49 -0
  13. data/lib/macroape/cli/eval_alignment.rb +124 -0
  14. data/lib/macroape/cli/eval_similarity.rb +107 -0
  15. data/lib/macroape/cli/find_pvalue.rb +89 -0
  16. data/lib/macroape/cli/find_threshold.rb +84 -0
  17. data/lib/macroape/cli/preprocess_collection.rb +123 -0
  18. data/lib/macroape/cli/scan_collection.rb +141 -0
  19. data/lib/macroape/cli.rb +5 -0
  20. data/lib/macroape/counting.rb +15 -1
  21. data/lib/macroape/pwm_compare.rb +21 -1
  22. data/lib/macroape/pwm_compare_aligned.rb +21 -0
  23. data/lib/macroape/version.rb +1 -1
  24. data/macroape.gemspec +1 -1
  25. data/test/align_motifs_test.rb +12 -0
  26. data/test/data/KLF3_f1.pat +16 -0
  27. data/test/data/KLF3_f1.pcm +16 -0
  28. data/test/data/KLF4_f2.pcm +11 -0
  29. data/test/data/SP1_f1.pat +11 -11
  30. data/test/data/SP1_f1.pcm +12 -0
  31. data/test/data/SP1_f1_revcomp.pat +11 -11
  32. data/test/data/SP1_f1_revcomp.pcm +12 -0
  33. data/test/data/test_collection/SP1_f1.pat +11 -11
  34. data/test/data/test_collection.yaml +49 -109
  35. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
  36. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
  37. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
  38. data/test/data/test_collection_single_file.txt +38 -0
  39. data/test/data/test_collection_single_file_pcm.txt +38 -0
  40. data/test/eval_alignment_test.rb +31 -0
  41. data/test/eval_similarity_test.rb +28 -13
  42. data/test/find_pvalue_test.rb +10 -13
  43. data/test/find_threshold_test.rb +10 -5
  44. data/test/preprocess_collection_test.rb +36 -2
  45. data/test/scan_collection_test.rb +9 -4
  46. data/test/test_helper.rb +61 -2
  47. metadata +38 -12
  48. data/lib/macroape/exec/eval_alignment.rb +0 -125
  49. data/lib/macroape/exec/eval_similarity.rb +0 -108
  50. data/lib/macroape/exec/find_pvalue.rb +0 -81
  51. data/lib/macroape/exec/find_threshold.rb +0 -77
  52. data/lib/macroape/exec/preprocess_collection.rb +0 -101
  53. data/lib/macroape/exec/scan_collection.rb +0 -124
  54. data/test/eval_alignment_similarity_test.rb +0 -20
@@ -1,124 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby scan_collection.rb <pat-file> <collection> [options]
4
- or in linux
5
- cat <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
6
- or on windows
7
- type <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
8
-
9
- Options:
10
- [-p <P-value>]
11
- [-c <similarity cutoff (minimal similarity to be included in output)> ] or [--all], '-c 0.05' by default
12
- [--precise [<level, minimal similarity to check on a more precise discretization level on the second pass>]], off by default, '--precise 0.01' if level is not set
13
- [--silent] - don't show current progress information during scan (by default this information's written into stderr)
14
-
15
- Output format:
16
- <name> <similarity jaccard index> <shift> <overlap> <orientation> * [in case that result calculated on the second pass(in precise mode)]
17
- Attention! Name can contain whitespace characters.
18
- Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
19
-
20
- Example:
21
- ruby scan_collection.rb motifs/KLF4.pat collection.yaml -p 0.005
22
- or in linux
23
- cat motifs/KLF4.pat | ruby scan_collection.rb .stdin collection.yaml -p 0.005 --precise 0.03
24
- }
25
-
26
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
27
- require 'macroape'
28
- require 'yaml'
29
-
30
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
31
- STDERR.puts help_string
32
- exit
33
- end
34
-
35
- begin
36
- filename = ARGV.shift
37
- collection_file = ARGV.shift
38
- raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
39
- raise "No input. You'd specify input file with collection" unless collection_file
40
- raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
41
-
42
- pvalue = 0.0005
43
- cutoff = 0.05 # minimal similarity to output
44
- collection = YAML.load_file(collection_file)
45
- background_query = collection.background
46
-
47
- silent = false
48
- precision_mode = :rough
49
- until ARGV.empty?
50
- case ARGV.shift
51
- when '-bq'
52
- background_query = ARGV.shift(4).map(&:to_f)
53
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background_query == background_query.reverse
54
- when '-p'
55
- pvalue = ARGV.shift.to_f
56
- when '-m'
57
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
58
- when '-md'
59
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
60
- when '-c'
61
- cutoff = ARGV.shift.to_f
62
- when '--all'
63
- cutoff = 0.0
64
- when '--silent'
65
- silent = true
66
- when '--precise'
67
- precision_mode = :precise
68
- begin
69
- Float(ARGV.first)
70
- minimal_similarity = ARGV.shift.to_f
71
- rescue
72
- minimal_similarity = 0.05
73
- end
74
- end
75
- end
76
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
77
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
78
-
79
- raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
80
-
81
- if filename == '.stdin'
82
- # query_pwm = Macroape::SingleMatrix.load_from_stdin(STDIN)
83
- else
84
- raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
85
- query_pwm = Bioinform::PWM.new(File.read(filename))
86
- end
87
-
88
-
89
- query_pwm_rough = query_pwm.background(background_query).discrete(collection.rough_discretization)
90
- query_pwm_precise = query_pwm.background(background_query).discrete(collection.precise_discretization)
91
-
92
- threshold = query_pwm_rough.threshold(pvalue)
93
- threshold_precise = query_pwm_precise.threshold(pvalue)
94
-
95
- similarities = {}
96
- precision_file_mode = {}
97
-
98
- collection.pwms.each_key do |name|
99
- pwm = collection.pwms[name]
100
- pwm_info = collection.infos[name]
101
- STDERR.puts pwm.name unless silent
102
- cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
103
- info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
104
- precision_file_mode[name] = :rough
105
-
106
- if precision_mode == :precise and info[:similarity] >= minimal_similarity
107
- cmp = Macroape::PWMCompare.new(query_pwm_precise, pwm.background(collection.background).discrete(collection.precise_discretization))
108
- info = cmp.jaccard(threshold_precise, pwm_info[:precise][pvalue] * collection.precise_discretization)
109
- precision_file_mode[name] = :precise
110
- end
111
- similarities[name] = info
112
- end
113
-
114
- puts "#pwm\tsimilarity\tshift\toverlap\torientation"
115
- similarities.sort_by do |name, info|
116
- info[:similarity]
117
- end.reverse.each do |name, info|
118
- precision_text = (precision_file_mode[name] == :precise) ? "\t*" : ""
119
- puts "#{name}\t#{info[:similarity]}\t#{info[:shift]}\t#{info[:overlap]}\t#{info[:orientation]}#{precision_text}" if info[:similarity] >= cutoff
120
- end
121
-
122
- rescue => err
123
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
124
- end
@@ -1,20 +0,0 @@
1
- require 'test_helper'
2
-
3
- puts "\n\neval_alignment test:"
4
- class TestEvalAlignmentSimilarity < Test::Unit::TestCase
5
- def test_process_at_optimal_alignment
6
- IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')){|f|
7
- assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
8
- }
9
- end
10
- def test_process_not_optimal_alignment
11
- IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct')){|f|
12
- assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
13
- }
14
- end
15
- def test_process_at_optimal_alignment_reversed
16
- IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp')){|f|
17
- assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
18
- }
19
- end
20
- end