macroape 3.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/.gitignore +18 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE +22 -0
  4. data/README.md +61 -0
  5. data/Rakefile +7 -0
  6. data/bin/eval_alignment +3 -0
  7. data/bin/eval_similarity +3 -0
  8. data/bin/find_pvalue +3 -0
  9. data/bin/find_threshold +3 -0
  10. data/bin/preprocess_collection +3 -0
  11. data/bin/scan_collection +3 -0
  12. data/lib/macroape/aligned_pair_intersection.rb +136 -0
  13. data/lib/macroape/aligned_pair_metrics.rb +24 -0
  14. data/lib/macroape/aligned_pair_transformations.rb +23 -0
  15. data/lib/macroape/collection.rb +15 -0
  16. data/lib/macroape/count_by_threshold.rb +34 -0
  17. data/lib/macroape/exec/eval_alignment.rb +141 -0
  18. data/lib/macroape/exec/eval_similarity.rb +107 -0
  19. data/lib/macroape/exec/find_pvalue.rb +80 -0
  20. data/lib/macroape/exec/find_threshold.rb +76 -0
  21. data/lib/macroape/exec/preprocess_collection.rb +94 -0
  22. data/lib/macroape/exec/scan_collection.rb +124 -0
  23. data/lib/macroape/extract_pwm.rb +32 -0
  24. data/lib/macroape/gauss_estimation.rb +30 -0
  25. data/lib/macroape/matrix_information.rb +29 -0
  26. data/lib/macroape/matrix_on_background.rb +16 -0
  27. data/lib/macroape/matrix_transformations.rb +29 -0
  28. data/lib/macroape/pair_metrics.rb +9 -0
  29. data/lib/macroape/pair_transformations.rb +28 -0
  30. data/lib/macroape/pwm_compare.rb +10 -0
  31. data/lib/macroape/pwm_compare_aligned.rb +13 -0
  32. data/lib/macroape/single_matrix.rb +45 -0
  33. data/lib/macroape/support.rb +34 -0
  34. data/lib/macroape/threshold_by_pvalue.rb +68 -0
  35. data/lib/macroape/version.rb +3 -0
  36. data/lib/macroape.rb +26 -0
  37. data/macroape.gemspec +17 -0
  38. data/test/data/AHR_si.pat +10 -0
  39. data/test/data/KLF4_f2.pat +11 -0
  40. data/test/data/KLF4_f2_scan_results_all.txt +4 -0
  41. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +3 -0
  42. data/test/data/KLF4_f2_scan_results_precise_mode.txt +4 -0
  43. data/test/data/SP1_f1.pat +12 -0
  44. data/test/data/SP1_f1_revcomp.pat +12 -0
  45. data/test/data/test_collection/GABPA_f1.pat +14 -0
  46. data/test/data/test_collection/KLF4_f2.pat +11 -0
  47. data/test/data/test_collection/SP1_f1.pat +12 -0
  48. data/test/data/test_collection.yaml +186 -0
  49. data/test/macroape_test.rb +125 -0
  50. metadata +116 -0
@@ -0,0 +1,68 @@
1
+ module PWM
2
+ module ThresholdByPvalue
3
+ def threshold(pvalue)
4
+ thresholds(pvalue){|_, thresh, _| return thresh }
5
+ end
6
+
7
+ def thresholds(*pvalues)
8
+ thresholds_by_pvalues(*pvalues).each do |pvalue,(left_threshold, left_count, right_threshold, right_count)|
9
+ threshold = left_threshold + 0.1
10
+ real_pvalue = right_count.to_f / number_of_words
11
+ yield pvalue, threshold, real_pvalue
12
+ end
13
+ end
14
+
15
+ # ret-value: hash {pvalue => [left_threshold, left_count, right_threshold, right_count]}
16
+ def thresholds_by_pvalues(*pvalues)
17
+ max_pvalue = pvalues.max
18
+ max_look_for_count = max_pvalue * sum_of_probabilities ** length
19
+ scores={}
20
+ until scores.inject(0){|sum,(score,count)| sum + count} >= max_look_for_count
21
+ scores = calculate_count_distribution_after_threshold(threshold_gauss_estimation(max_pvalue))
22
+ max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue
23
+ end
24
+ pvalue_counts = pvalues.sort.inject(Hash.new){|h, pvalue| h.merge pvalue => pvalue * sum_of_probabilities**length }
25
+ look_for_counts = pvalue_counts.to_a
26
+ sum_count = 0
27
+ scores = scores.sort.reverse
28
+ results = {}
29
+ scores.size.times do |i|
30
+ while !look_for_counts.empty? and sum_count + scores[i][1] > look_for_counts.first[1] # usually this 'while' works as 'if'
31
+ cnt = look_for_counts.shift
32
+ pval = cnt[0]
33
+ score = cnt[1]
34
+
35
+ threshold_2 = scores[i][0]
36
+ sum_count_2 = sum_count + scores[i][1]
37
+ if i>0
38
+ threshold = scores[i-1][0]
39
+ results[pval] = [threshold_2.to_f, sum_count_2, threshold.to_f, sum_count.to_f]
40
+ else
41
+ results[pval] = [threshold_2.to_f, sum_count_2.to_f, best_score + 1.0, 0.0]
42
+ end
43
+ end
44
+ sum_count += scores[i][1]
45
+ end
46
+ results
47
+ end
48
+
49
+ def calculate_count_distribution_after_threshold(threshold)
50
+ scores = { 0 => 1 }
51
+ length.times do |column|
52
+ new_scores = Hash.new(0);
53
+ scores.each do |score, count|
54
+ 4.times do |letter|
55
+ new_score = score + matrix[column][letter]
56
+ if new_score + best_suffix[column + 1] >= threshold
57
+ new_scores[new_score] += count * probabilities[letter]
58
+ end
59
+ end
60
+ end
61
+ raise 'Hash overflow in PWM::ThresholdByPvalue#calculate_count_distribution_after_threshold' if defined? MaxHashSize and new_scores.size > MaxHashSize
62
+ scores = new_scores
63
+ end
64
+ scores
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,3 @@
1
+ module Macroape
2
+ VERSION = "3.2.2"
3
+ end
data/lib/macroape.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'macroape/version'
2
+ require 'yaml'
3
+
4
+ require 'macroape/extract_pwm'
5
+ require 'macroape/support'
6
+ require 'macroape/matrix_transformations'
7
+ require 'macroape/matrix_information'
8
+ require 'macroape/gauss_estimation'
9
+ require 'macroape/threshold_by_pvalue'
10
+ require 'macroape/single_matrix'
11
+ require 'macroape/count_by_threshold'
12
+ require 'macroape/matrix_on_background'
13
+
14
+ require 'macroape/aligned_pair_transformations'
15
+ require 'macroape/aligned_pair_metrics'
16
+ require 'macroape/aligned_pair_intersection'
17
+ require 'macroape/pwm_compare_aligned'
18
+
19
+ require 'macroape/pair_transformations'
20
+ require 'macroape/pair_metrics'
21
+ require 'macroape/pwm_compare'
22
+ require 'macroape/collection'
23
+
24
+ module Macroape
25
+ # Your code goes here...
26
+ end
data/macroape.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/macroape/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Ilya Vorontsov"]
6
+ gem.email = ["prijutme4ty@gmail.com"]
7
+ gem.description = %q{Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value Estimation. It's a bioinformatic tool for evaluating similarity measure and best alignment between a pair of Position Weight Matrices(PWM), finding thresholds by P-values and inside out and even searching a collection of motifs for the most similar ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw}
8
+ gem.summary = %q{PWM comparison tool using MACROAPE approach}
9
+ gem.homepage = "http://autosome.ru/macroape/"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "macroape"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Macroape::VERSION
17
+ end
@@ -0,0 +1,10 @@
1
+ AHR_si
2
+ 0.049659785047588834 -0.7112292711652757 0.3721858143748157 0.007118345755728267
3
+ -1.1863517717347094 -1.107677589450009 -0.10122262270571995 0.9004092051347228
4
+ -0.5481419620395529 0.12723771473208506 -0.5930888656288792 0.5502420250441413
5
+ -2.3460850844035757 -2.7362137930742167 1.3086872088653387 -1.9795601218727432
6
+ -3.45212725495741 1.3387809407527818 -2.67026944276278 -2.46866856874775
7
+ -2.1349735467777395 -2.965000508388952 1.3322808172765488 -3.206089427067848
8
+ -3.45212725495741 -2.1349735467777395 -3.0087911229263207 1.335191259740944
9
+ -3.45212725495741 -3.45212725495741 1.3622489404156526 -3.45212725495741
10
+ 0.10927007207680632 0.5389064901558549 -0.8250174241334556 -0.3117320885632525
@@ -0,0 +1,11 @@
1
+ KLF4_f2.xml
2
+ 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
3
+ -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
4
+ -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
5
+ -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
6
+ -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
7
+ -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
8
+ -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
9
+ -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
10
+ -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
11
+ -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
@@ -0,0 +1,4 @@
1
+ #pwm similarity shift overlap orientation
2
+ KLF4_f2 1.0 0 10 direct
3
+ SP1_f1 0.22754919499105544 -1 10 direct
4
+ GABPA_f1 0.00043527658136684877 -8 5 direct
@@ -0,0 +1,3 @@
1
+ #pwm similarity shift overlap orientation
2
+ KLF4_f2 1.0 0 10 direct
3
+ SP1_f1 0.22754919499105544 -1 10 direct
@@ -0,0 +1,4 @@
1
+ #pwm similarity shift overlap orientation
2
+ KLF4_f2 1.0 0 10 direct *
3
+ SP1_f1 0.2420758234928527 -1 10 direct *
4
+ GABPA_f1 0.00043527658136684877 -8 5 direct
@@ -0,0 +1,12 @@
1
+ > SP1_f1
2
+ -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
3
+ -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
4
+ -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
5
+ -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
6
+ -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
7
+ -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
8
+ -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
9
+ -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
10
+ -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
11
+ -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
12
+ -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
@@ -0,0 +1,12 @@
1
+ SP1_f1_revcomp
2
+ -0.6825482097865606 -0.28626775700282125 0.7641033353626651 -0.5166047365590577
3
+ -1.6080243648157357 1.163676006374752 -1.2251832285630033 -1.1896356092245055
4
+ -1.7780413702431377 1.126543157436868 -2.2510053061629707 -0.4450938582835542
5
+ -2.0846739035376483 1.338748858978805 -3.138426807809667 -4.015548413965584
6
+ -2.3243342594990253 1.3113096718240569 -2.9784027708801153 -1.9643526491643326
7
+ -0.851098348782244 -2.624685764808605 0.9449196547620103 -0.07473964149330914
8
+ -4.8899306705082335 1.3510439136452728 -3.719643631330185 -2.2369752892820087
9
+ -5.077972423609655 1.3684371349982631 -2.916163300253228 -4.563896055436894
10
+ -4.338927525031567 1.1807800242010371 -2.973985191321805 -0.387227623476054
11
+ -0.6138563775211981 1.0960627561110399 -2.1119259694238686 -1.0631255752097801
12
+ -1.1060188862599292 0.8657012535789861 -0.6748234046937317 -0.24435707885585334
@@ -0,0 +1,14 @@
1
+ GABPA_f1
2
+ -0.1106670158341858 0.013801606113892391 0.6054596108973699 -1.3518085041421573
3
+ 0.37030668921643345 0.15761121480429963 0.009069314183831202 -0.9888619717703562
4
+ 0.47526546359546684 -0.3011678534572083 0.4031522994412777 -1.8638752827041059
5
+ -1.5544255540164373 1.1082369687811506 -0.2814091552834454 -5.30708531823271
6
+ -0.6362037835776368 1.235338189985594 -3.5801322928552253 -5.717323067092849
7
+ -5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
8
+ -5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
9
+ 1.3835219739184708 -5.2341956006430985 -5.852906870733575 -5.852906870733575
10
+ 1.3756340514956562 -5.394962755562375 -5.394962755562375 -3.401117964959733
11
+ -1.2176198315414444 -3.109079898175411 1.2964067931472216 -5.717323067092849
12
+ -1.3716559438167257 -0.2761401935045069 -1.8504445165866068 1.0404320473626856
13
+ -0.5440863133031895 -0.48103682561971345 0.907381908447086 -1.1280642594012078
14
+ 0.10557340209290218 -0.01814819455289191 0.4381106695354074 -1.0304105539540915
@@ -0,0 +1,11 @@
1
+ KLF4_f2
2
+ 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
3
+ -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
4
+ -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
5
+ -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
6
+ -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
7
+ -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
8
+ -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
9
+ -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
10
+ -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
11
+ -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
@@ -0,0 +1,12 @@
1
+ > SP1_f1
2
+ -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
3
+ -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
4
+ -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
5
+ -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
6
+ -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
7
+ -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
8
+ -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
9
+ -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
10
+ -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
11
+ -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
12
+ -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
@@ -0,0 +1,186 @@
1
+ --- !ruby/object:PWM::Collection
2
+ pvalues:
3
+ - 0.0005
4
+ - 0.0001
5
+ - 5.0e-05
6
+ background:
7
+ - 1
8
+ - 1
9
+ - 1
10
+ - 1
11
+ precise_discretization: 10
12
+ rough_discretization: 1
13
+ pwms:
14
+ GABPA_f1: !ruby/object:PWM::SingleMatrix
15
+ matrix:
16
+ - - -0.1106670158341858
17
+ - 0.013801606113892391
18
+ - 0.6054596108973699
19
+ - -1.3518085041421573
20
+ - - 0.37030668921643345
21
+ - 0.15761121480429963
22
+ - 0.009069314183831202
23
+ - -0.9888619717703562
24
+ - - 0.47526546359546684
25
+ - -0.3011678534572083
26
+ - 0.4031522994412777
27
+ - -1.8638752827041059
28
+ - - -1.5544255540164373
29
+ - 1.1082369687811506
30
+ - -0.2814091552834454
31
+ - -5.30708531823271
32
+ - - -0.6362037835776368
33
+ - 1.235338189985594
34
+ - -3.5801322928552253
35
+ - -5.717323067092849
36
+ - - -5.852906870733575
37
+ - -5.852906870733575
38
+ - 1.3841383838057746
39
+ - -5.852906870733575
40
+ - - -5.852906870733575
41
+ - -5.852906870733575
42
+ - 1.3841383838057746
43
+ - -5.852906870733575
44
+ - - 1.3835219739184708
45
+ - -5.2341956006430985
46
+ - -5.852906870733575
47
+ - -5.852906870733575
48
+ - - 1.3756340514956562
49
+ - -5.394962755562375
50
+ - -5.394962755562375
51
+ - -3.401117964959733
52
+ - - -1.2176198315414444
53
+ - -3.109079898175411
54
+ - 1.2964067931472216
55
+ - -5.717323067092849
56
+ - - -1.3716559438167257
57
+ - -0.2761401935045069
58
+ - -1.8504445165866068
59
+ - 1.0404320473626856
60
+ - - -0.5440863133031895
61
+ - -0.48103682561971345
62
+ - 0.907381908447086
63
+ - -1.1280642594012078
64
+ - - 0.10557340209290218
65
+ - -0.01814819455289191
66
+ - 0.4381106695354074
67
+ - -1.0304105539540915
68
+ name: GABPA_f1
69
+ KLF4_f2: !ruby/object:PWM::SingleMatrix
70
+ matrix:
71
+ - - 0.30861857265872605
72
+ - -2.254321000121579
73
+ - 0.13505703522674192
74
+ - 0.3285194224375633
75
+ - - -1.227018967707036
76
+ - -4.814127713368663
77
+ - 1.3059890687390967
78
+ - -4.908681463544344
79
+ - - -2.443469374521196
80
+ - -4.648238485031404
81
+ - 1.3588686548279805
82
+ - -4.441801801188402
83
+ - - -2.7177827948276123
84
+ - -3.8073538975356565
85
+ - 1.356272809724262
86
+ - -3.504104725510225
87
+ - - -0.5563232977367343
88
+ - 0.5340697765121405
89
+ - -3.61417723090579
90
+ - 0.5270259776377405
91
+ - - -1.8687622060887386
92
+ - -4.381483976582316
93
+ - 1.337932245336098
94
+ - -3.815629658877517
95
+ - - -2.045671123823928
96
+ - -2.384975142213679
97
+ - 0.7198551207724355
98
+ - 0.5449254135616948
99
+ - - -1.373157530374372
100
+ - -3.0063112097748217
101
+ - 1.285188335493552
102
+ - -2.5026044231773543
103
+ - - -2.1030513122772208
104
+ - -1.8941348100402244
105
+ - 1.249265758393991
106
+ - -1.4284210948906104
107
+ - - -1.3277128628152939
108
+ - 0.8982415633049462
109
+ - -0.8080773665408135
110
+ - -0.18161647647456935
111
+ name: KLF4_f2
112
+ SP1_f1: !ruby/object:PWM::SingleMatrix
113
+ matrix:
114
+ - - -0.24435707885585334
115
+ - -0.6748234046937317
116
+ - 0.8657012535789861
117
+ - -1.1060188862599292
118
+ - - -1.0631255752097801
119
+ - -2.1119259694238686
120
+ - 1.0960627561110399
121
+ - -0.6138563775211981
122
+ - - -0.387227623476054
123
+ - -2.973985191321805
124
+ - 1.1807800242010371
125
+ - -4.338927525031567
126
+ - - -4.563896055436894
127
+ - -2.916163300253228
128
+ - 1.3684371349982631
129
+ - -5.077972423609655
130
+ - - -2.2369752892820087
131
+ - -3.719643631330185
132
+ - 1.3510439136452728
133
+ - -4.8899306705082335
134
+ - - -0.07473964149330914
135
+ - 0.9449196547620103
136
+ - -2.624685764808605
137
+ - -0.851098348782244
138
+ - - -1.9643526491643326
139
+ - -2.9784027708801153
140
+ - 1.3113096718240569
141
+ - -2.3243342594990253
142
+ - - -4.015548413965584
143
+ - -3.138426807809667
144
+ - 1.338748858978805
145
+ - -2.0846739035376483
146
+ - - -0.4450938582835542
147
+ - -2.2510053061629707
148
+ - 1.126543157436868
149
+ - -1.7780413702431377
150
+ - - -1.1896356092245055
151
+ - -1.2251832285630033
152
+ - 1.163676006374752
153
+ - -1.6080243648157357
154
+ - - -0.5166047365590577
155
+ - 0.7641033353626651
156
+ - -0.28626775700282125
157
+ - -0.6825482097865606
158
+ name: SP1_f1
159
+ infos:
160
+ GABPA_f1:
161
+ :rough:
162
+ 5.0e-05: 16.1
163
+ 0.0001: 15.1
164
+ 0.0005: 12.1
165
+ :precise:
166
+ 5.0e-05: 8.61
167
+ 0.0001: 7.609999999999999
168
+ 0.0005: 4.51
169
+ KLF4_f2:
170
+ :rough:
171
+ 5.0e-05: 14.1
172
+ 0.0001: 13.1
173
+ 0.0005: 11.1
174
+ :precise:
175
+ 5.0e-05: 8.51
176
+ 0.0001: 7.909999999999999
177
+ 0.0005: 5.8100000000000005
178
+ SP1_f1:
179
+ :rough:
180
+ 5.0e-05: 14.1
181
+ 0.0001: 14.1
182
+ 0.0005: 11.1
183
+ :precise:
184
+ 5.0e-05: 8.51
185
+ 0.0001: 7.709999999999999
186
+ 0.0005: 5.61
@@ -0,0 +1,125 @@
1
+ require 'test/unit'
2
+
3
+ module Helpers
4
+ def self.obtain_pvalue_by_threshold(args)
5
+ IO.popen("find_pvalue #{args}",&:read).strip.split.last
6
+ end
7
+ end
8
+
9
+ class FindThresholdTest < Test::Unit::TestCase
10
+ def test_process_several_pvalues
11
+ pvalues = []
12
+ IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001 0.0005', &:read).lines.each{|line|
13
+ pvalue, threshold, real_pvalue = line.strip.split("\t")
14
+ pvalues << pvalue
15
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
16
+ }
17
+ assert_equal pvalues, ['0.0005', '0.001']
18
+ end
19
+ def test_process_one_pvalue
20
+ pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001', &:read).strip.split("\t")
21
+ assert_equal '0.001', pvalue
22
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
23
+ end
24
+ def test_process_default_pvalue
25
+ pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat', &:read).strip.split("\t")
26
+ assert_equal '0.0005', pvalue
27
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
28
+ end
29
+ def test_custom_discretization
30
+ pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -d 100',&:read).strip.split("\t")
31
+ assert_equal '0.0005', pvalue
32
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold} -d 100"), real_pvalue
33
+ end
34
+ end
35
+
36
+ class FindPvalueTest < Test::Unit::TestCase
37
+ def test_process_one_threshold
38
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719'){|f|
39
+ assert_equal "4.1719\t1048.0\t0.00099945068359375\n", f.read
40
+ }
41
+ end
42
+ def test_process_several_thresholds
43
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719 5.2403'){|f|
44
+ assert_equal "4.1719\t1048.0\t0.00099945068359375\n5.2403\t524.0\t0.000499725341796875\n", f.read
45
+ }
46
+ end
47
+ def test_process_several_thresholds_result_is_ordered
48
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 4.1719'){|f|
49
+ assert_equal "5.2403\t524.0\t0.000499725341796875\n4.1719\t1048.0\t0.00099945068359375\n", f.read
50
+ }
51
+ end
52
+ def test_custom_discretization
53
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 -d 100'){|f|
54
+ assert_equal "5.2403\t527.0\t0.0005025863647460938\n", f.read
55
+ }
56
+ end
57
+ end
58
+
59
+
60
+ class TestEvalSimilarity < Test::Unit::TestCase
61
+ def test_process_pair_of_pwms
62
+ IO.popen('eval_similarity test/data/KLF4_f2.pat test/data/SP1_f1.pat'){|f|
63
+ assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
64
+ }
65
+ end
66
+ def test_process_another_pair_of_pwms
67
+ IO.popen('eval_similarity test/data/SP1_f1.pat test/data/AHR_si.pat'){|f|
68
+ assert_equal "0.0037332005973120955\n15.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>.\n1\tdirect\n", f.read
69
+ }
70
+ end
71
+
72
+ def test_recognize_orientation_of_alignment
73
+ IO.popen('eval_similarity test/data/SP1_f1_revcomp.pat test/data/SP1_f1.pat'){|f|
74
+ assert_equal "1.0\n2033.0\t11\n>>>>>>>>>>>\n<<<<<<<<<<<\n0\trevcomp\n", f.read
75
+ }
76
+ end
77
+
78
+ def test_process_custom_discretization
79
+ IO.popen('eval_similarity test/data/SP1_f1.pat test/data/KLF4_f2.pat -d 1'){|f|
80
+ assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n", f.read
81
+ }
82
+ end
83
+ end
84
+
85
+ class TestEvalAlignmentSimilarity < Test::Unit::TestCase
86
+ def test_process_at_optimal_alignment
87
+ IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct '){|f|
88
+ assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
89
+ }
90
+ end
91
+ def test_process_not_optimal_alignment
92
+ IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct '){|f|
93
+ assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
94
+ }
95
+ end
96
+ def test_process_at_optimal_alignment_reversed
97
+ IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp '){|f|
98
+ assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
99
+ }
100
+ end
101
+ end
102
+
103
+ class TestPreprocessCollection < Test::Unit::TestCase
104
+ def test_multipvalue_preproceessing
105
+ system('preprocess_collection ./test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
106
+ assert_equal File.read('test/data/test_collection.yaml'), File.read('test/data/test_collection.yaml.tmp')
107
+ File.delete 'test/data/test_collection.yaml.tmp'
108
+ end
109
+ end
110
+
111
+ class TestScanCollection < Test::Unit::TestCase
112
+ def test_scan_default_cutoff
113
+ assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt'),
114
+ IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --silent', &:read)
115
+ end
116
+ def test_scan_and_output_all_results
117
+ assert_equal File.read('test/data/KLF4_f2_scan_results_all.txt'),
118
+ IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --all --silent', &:read)
119
+
120
+ end
121
+ def test_scan_precise_mode
122
+ assert_equal File.read('test/data/KLF4_f2_scan_results_precise_mode.txt'),
123
+ IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --precise --all --silent', &:read)
124
+ end
125
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: macroape
3
+ version: !ruby/object:Gem::Version
4
+ version: 3.2.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ilya Vorontsov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-28 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
15
+ Estimation. It's a bioinformatic tool for evaluating similarity measure and best
16
+ alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
17
+ P-values and inside out and even searching a collection of motifs for the most similar
18
+ ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
19
+ email:
20
+ - prijutme4ty@gmail.com
21
+ executables:
22
+ - eval_alignment
23
+ - eval_similarity
24
+ - find_pvalue
25
+ - find_threshold
26
+ - preprocess_collection
27
+ - scan_collection
28
+ extensions: []
29
+ extra_rdoc_files: []
30
+ files:
31
+ - .gitignore
32
+ - Gemfile
33
+ - LICENSE
34
+ - README.md
35
+ - Rakefile
36
+ - bin/eval_alignment
37
+ - bin/eval_similarity
38
+ - bin/find_pvalue
39
+ - bin/find_threshold
40
+ - bin/preprocess_collection
41
+ - bin/scan_collection
42
+ - lib/macroape.rb
43
+ - lib/macroape/aligned_pair_intersection.rb
44
+ - lib/macroape/aligned_pair_metrics.rb
45
+ - lib/macroape/aligned_pair_transformations.rb
46
+ - lib/macroape/collection.rb
47
+ - lib/macroape/count_by_threshold.rb
48
+ - lib/macroape/exec/eval_alignment.rb
49
+ - lib/macroape/exec/eval_similarity.rb
50
+ - lib/macroape/exec/find_pvalue.rb
51
+ - lib/macroape/exec/find_threshold.rb
52
+ - lib/macroape/exec/preprocess_collection.rb
53
+ - lib/macroape/exec/scan_collection.rb
54
+ - lib/macroape/extract_pwm.rb
55
+ - lib/macroape/gauss_estimation.rb
56
+ - lib/macroape/matrix_information.rb
57
+ - lib/macroape/matrix_on_background.rb
58
+ - lib/macroape/matrix_transformations.rb
59
+ - lib/macroape/pair_metrics.rb
60
+ - lib/macroape/pair_transformations.rb
61
+ - lib/macroape/pwm_compare.rb
62
+ - lib/macroape/pwm_compare_aligned.rb
63
+ - lib/macroape/single_matrix.rb
64
+ - lib/macroape/support.rb
65
+ - lib/macroape/threshold_by_pvalue.rb
66
+ - lib/macroape/version.rb
67
+ - macroape.gemspec
68
+ - test/data/AHR_si.pat
69
+ - test/data/KLF4_f2.pat
70
+ - test/data/KLF4_f2_scan_results_all.txt
71
+ - test/data/KLF4_f2_scan_results_default_cutoff.txt
72
+ - test/data/KLF4_f2_scan_results_precise_mode.txt
73
+ - test/data/SP1_f1.pat
74
+ - test/data/SP1_f1_revcomp.pat
75
+ - test/data/test_collection.yaml
76
+ - test/data/test_collection/GABPA_f1.pat
77
+ - test/data/test_collection/KLF4_f2.pat
78
+ - test/data/test_collection/SP1_f1.pat
79
+ - test/macroape_test.rb
80
+ homepage: http://autosome.ru/macroape/
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 1.8.24
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: PWM comparison tool using MACROAPE approach
104
+ test_files:
105
+ - test/data/AHR_si.pat
106
+ - test/data/KLF4_f2.pat
107
+ - test/data/KLF4_f2_scan_results_all.txt
108
+ - test/data/KLF4_f2_scan_results_default_cutoff.txt
109
+ - test/data/KLF4_f2_scan_results_precise_mode.txt
110
+ - test/data/SP1_f1.pat
111
+ - test/data/SP1_f1_revcomp.pat
112
+ - test/data/test_collection.yaml
113
+ - test/data/test_collection/GABPA_f1.pat
114
+ - test/data/test_collection/KLF4_f2.pat
115
+ - test/data/test_collection/SP1_f1.pat
116
+ - test/macroape_test.rb