macroape 3.3.3 → 3.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.gitignore +1 -0
  2. data/Rakefile.rb +7 -22
  3. data/TODO.txt +7 -6
  4. data/bin/align_motifs +4 -0
  5. data/bin/eval_alignment +2 -1
  6. data/bin/eval_similarity +2 -1
  7. data/bin/find_pvalue +2 -1
  8. data/bin/find_threshold +2 -1
  9. data/bin/preprocess_collection +2 -1
  10. data/bin/scan_collection +2 -1
  11. data/lib/macroape/aligned_pair_intersection.rb +2 -3
  12. data/lib/macroape/cli/align_motifs.rb +49 -0
  13. data/lib/macroape/cli/eval_alignment.rb +124 -0
  14. data/lib/macroape/cli/eval_similarity.rb +107 -0
  15. data/lib/macroape/cli/find_pvalue.rb +89 -0
  16. data/lib/macroape/cli/find_threshold.rb +84 -0
  17. data/lib/macroape/cli/preprocess_collection.rb +123 -0
  18. data/lib/macroape/cli/scan_collection.rb +141 -0
  19. data/lib/macroape/cli.rb +5 -0
  20. data/lib/macroape/counting.rb +15 -1
  21. data/lib/macroape/pwm_compare.rb +21 -1
  22. data/lib/macroape/pwm_compare_aligned.rb +21 -0
  23. data/lib/macroape/version.rb +1 -1
  24. data/macroape.gemspec +1 -1
  25. data/test/align_motifs_test.rb +12 -0
  26. data/test/data/KLF3_f1.pat +16 -0
  27. data/test/data/KLF3_f1.pcm +16 -0
  28. data/test/data/KLF4_f2.pcm +11 -0
  29. data/test/data/SP1_f1.pat +11 -11
  30. data/test/data/SP1_f1.pcm +12 -0
  31. data/test/data/SP1_f1_revcomp.pat +11 -11
  32. data/test/data/SP1_f1_revcomp.pcm +12 -0
  33. data/test/data/test_collection/SP1_f1.pat +11 -11
  34. data/test/data/test_collection.yaml +49 -109
  35. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -0
  36. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -0
  37. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -0
  38. data/test/data/test_collection_single_file.txt +38 -0
  39. data/test/data/test_collection_single_file_pcm.txt +38 -0
  40. data/test/eval_alignment_test.rb +31 -0
  41. data/test/eval_similarity_test.rb +28 -13
  42. data/test/find_pvalue_test.rb +10 -13
  43. data/test/find_threshold_test.rb +10 -5
  44. data/test/preprocess_collection_test.rb +36 -2
  45. data/test/scan_collection_test.rb +9 -4
  46. data/test/test_helper.rb +61 -2
  47. metadata +38 -12
  48. data/lib/macroape/exec/eval_alignment.rb +0 -125
  49. data/lib/macroape/exec/eval_similarity.rb +0 -108
  50. data/lib/macroape/exec/find_pvalue.rb +0 -81
  51. data/lib/macroape/exec/find_threshold.rb +0 -77
  52. data/lib/macroape/exec/preprocess_collection.rb +0 -101
  53. data/lib/macroape/exec/scan_collection.rb +0 -124
  54. data/test/eval_alignment_similarity_test.rb +0 -20
@@ -1,124 +0,0 @@
1
- help_string = %q{
2
- Command-line format:
3
- ruby scan_collection.rb <pat-file> <collection> [options]
4
- or in linux
5
- cat <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
6
- or on windows
7
- type <pat-file> | ruby scan_collection.rb .stdin <collection> [options]
8
-
9
- Options:
10
- [-p <P-value>]
11
- [-c <similarity cutoff (minimal similarity to be included in output)> ] or [--all], '-c 0.05' by default
12
- [--precise [<level, minimal similarity to check on a more precise discretization level on the second pass>]], off by default, '--precise 0.01' if level is not set
13
- [--silent] - don't show current progress information during scan (by default this information's written into stderr)
14
-
15
- Output format:
16
- <name> <similarity jaccard index> <shift> <overlap> <orientation> * [in case that result calculated on the second pass(in precise mode)]
17
- Attention! Name can contain whitespace characters.
18
- Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
19
-
20
- Example:
21
- ruby scan_collection.rb motifs/KLF4.pat collection.yaml -p 0.005
22
- or in linux
23
- cat motifs/KLF4.pat | ruby scan_collection.rb .stdin collection.yaml -p 0.005 --precise 0.03
24
- }
25
-
26
- $:.unshift File.join(File.dirname(__FILE__),'./../../')
27
- require 'macroape'
28
- require 'yaml'
29
-
30
- if ARGV.empty? or ARGV.include? '-h' or ARGV.include? '-help' or ARGV.include? '--help' or ARGV.include? '--h'
31
- STDERR.puts help_string
32
- exit
33
- end
34
-
35
- begin
36
- filename = ARGV.shift
37
- collection_file = ARGV.shift
38
- raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
39
- raise "No input. You'd specify input file with collection" unless collection_file
40
- raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
41
-
42
- pvalue = 0.0005
43
- cutoff = 0.05 # minimal similarity to output
44
- collection = YAML.load_file(collection_file)
45
- background_query = collection.background
46
-
47
- silent = false
48
- precision_mode = :rough
49
- until ARGV.empty?
50
- case ARGV.shift
51
- when '-bq'
52
- background_query = ARGV.shift(4).map(&:to_f)
53
- raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless background_query == background_query.reverse
54
- when '-p'
55
- pvalue = ARGV.shift.to_f
56
- when '-m'
57
- Macroape::MaxHashSizeSingle = ARGV.shift.to_f
58
- when '-md'
59
- Macroape::MaxHashSizeDouble = ARGV.shift.to_f
60
- when '-c'
61
- cutoff = ARGV.shift.to_f
62
- when '--all'
63
- cutoff = 0.0
64
- when '--silent'
65
- silent = true
66
- when '--precise'
67
- precision_mode = :precise
68
- begin
69
- Float(ARGV.first)
70
- minimal_similarity = ARGV.shift.to_f
71
- rescue
72
- minimal_similarity = 0.05
73
- end
74
- end
75
- end
76
- Macroape::MaxHashSizeSingle = 1000000 unless defined? Macroape::MaxHashSizeSingle
77
- Macroape::MaxHashSizeDouble = 1000 unless defined? Macroape::MaxHashSizeDouble
78
-
79
- raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
80
-
81
- if filename == '.stdin'
82
- # query_pwm = Macroape::SingleMatrix.load_from_stdin(STDIN)
83
- else
84
- raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
85
- query_pwm = Bioinform::PWM.new(File.read(filename))
86
- end
87
-
88
-
89
- query_pwm_rough = query_pwm.background(background_query).discrete(collection.rough_discretization)
90
- query_pwm_precise = query_pwm.background(background_query).discrete(collection.precise_discretization)
91
-
92
- threshold = query_pwm_rough.threshold(pvalue)
93
- threshold_precise = query_pwm_precise.threshold(pvalue)
94
-
95
- similarities = {}
96
- precision_file_mode = {}
97
-
98
- collection.pwms.each_key do |name|
99
- pwm = collection.pwms[name]
100
- pwm_info = collection.infos[name]
101
- STDERR.puts pwm.name unless silent
102
- cmp = Macroape::PWMCompare.new(query_pwm_rough, pwm.background(collection.background).discrete(collection.rough_discretization))
103
- info = cmp.jaccard(threshold, pwm_info[:rough][pvalue] * collection.rough_discretization)
104
- precision_file_mode[name] = :rough
105
-
106
- if precision_mode == :precise and info[:similarity] >= minimal_similarity
107
- cmp = Macroape::PWMCompare.new(query_pwm_precise, pwm.background(collection.background).discrete(collection.precise_discretization))
108
- info = cmp.jaccard(threshold_precise, pwm_info[:precise][pvalue] * collection.precise_discretization)
109
- precision_file_mode[name] = :precise
110
- end
111
- similarities[name] = info
112
- end
113
-
114
- puts "#pwm\tsimilarity\tshift\toverlap\torientation"
115
- similarities.sort_by do |name, info|
116
- info[:similarity]
117
- end.reverse.each do |name, info|
118
- precision_text = (precision_file_mode[name] == :precise) ? "\t*" : ""
119
- puts "#{name}\t#{info[:similarity]}\t#{info[:shift]}\t#{info[:overlap]}\t#{info[:orientation]}#{precision_text}" if info[:similarity] >= cutoff
120
- end
121
-
122
- rescue => err
123
- STDERR.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse -help option for help\n"
124
- end
@@ -1,20 +0,0 @@
1
- require 'test_helper'
2
-
3
- puts "\n\neval_alignment test:"
4
- class TestEvalAlignmentSimilarity < Test::Unit::TestCase
5
- def test_process_at_optimal_alignment
6
- IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')){|f|
7
- assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
8
- }
9
- end
10
- def test_process_not_optimal_alignment
11
- IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct')){|f|
12
- assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
13
- }
14
- end
15
- def test_process_at_optimal_alignment_reversed
16
- IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp')){|f|
17
- assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
18
- }
19
- end
20
- end