macroape 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. data/.gitignore +18 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE +22 -0
  4. data/README.md +61 -0
  5. data/Rakefile +7 -0
  6. data/bin/eval_alignment +3 -0
  7. data/bin/eval_similarity +3 -0
  8. data/bin/find_pvalue +3 -0
  9. data/bin/find_threshold +3 -0
  10. data/bin/preprocess_collection +3 -0
  11. data/bin/scan_collection +3 -0
  12. data/lib/macroape/aligned_pair_intersection.rb +136 -0
  13. data/lib/macroape/aligned_pair_metrics.rb +24 -0
  14. data/lib/macroape/aligned_pair_transformations.rb +23 -0
  15. data/lib/macroape/collection.rb +15 -0
  16. data/lib/macroape/count_by_threshold.rb +34 -0
  17. data/lib/macroape/exec/eval_alignment.rb +141 -0
  18. data/lib/macroape/exec/eval_similarity.rb +107 -0
  19. data/lib/macroape/exec/find_pvalue.rb +80 -0
  20. data/lib/macroape/exec/find_threshold.rb +76 -0
  21. data/lib/macroape/exec/preprocess_collection.rb +94 -0
  22. data/lib/macroape/exec/scan_collection.rb +124 -0
  23. data/lib/macroape/extract_pwm.rb +32 -0
  24. data/lib/macroape/gauss_estimation.rb +30 -0
  25. data/lib/macroape/matrix_information.rb +29 -0
  26. data/lib/macroape/matrix_on_background.rb +16 -0
  27. data/lib/macroape/matrix_transformations.rb +29 -0
  28. data/lib/macroape/pair_metrics.rb +9 -0
  29. data/lib/macroape/pair_transformations.rb +28 -0
  30. data/lib/macroape/pwm_compare.rb +10 -0
  31. data/lib/macroape/pwm_compare_aligned.rb +13 -0
  32. data/lib/macroape/single_matrix.rb +45 -0
  33. data/lib/macroape/support.rb +34 -0
  34. data/lib/macroape/threshold_by_pvalue.rb +68 -0
  35. data/lib/macroape/version.rb +3 -0
  36. data/lib/macroape.rb +26 -0
  37. data/macroape.gemspec +17 -0
  38. data/test/data/AHR_si.pat +10 -0
  39. data/test/data/KLF4_f2.pat +11 -0
  40. data/test/data/KLF4_f2_scan_results_all.txt +4 -0
  41. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +3 -0
  42. data/test/data/KLF4_f2_scan_results_precise_mode.txt +4 -0
  43. data/test/data/SP1_f1.pat +12 -0
  44. data/test/data/SP1_f1_revcomp.pat +12 -0
  45. data/test/data/test_collection/GABPA_f1.pat +14 -0
  46. data/test/data/test_collection/KLF4_f2.pat +11 -0
  47. data/test/data/test_collection/SP1_f1.pat +12 -0
  48. data/test/data/test_collection.yaml +186 -0
  49. data/test/macroape_test.rb +125 -0
  50. metadata +116 -0
@@ -0,0 +1,68 @@
1
+ module PWM
2
+ module ThresholdByPvalue
3
+ def threshold(pvalue)
4
+ thresholds(pvalue){|_, thresh, _| return thresh }
5
+ end
6
+
7
+ def thresholds(*pvalues)
8
+ thresholds_by_pvalues(*pvalues).each do |pvalue,(left_threshold, left_count, right_threshold, right_count)|
9
+ threshold = left_threshold + 0.1
10
+ real_pvalue = right_count.to_f / number_of_words
11
+ yield pvalue, threshold, real_pvalue
12
+ end
13
+ end
14
+
15
+ # ret-value: hash {pvalue => [left_threshold, left_count, right_threshold, right_count]}
16
+ def thresholds_by_pvalues(*pvalues)
17
+ max_pvalue = pvalues.max
18
+ max_look_for_count = max_pvalue * sum_of_probabilities ** length
19
+ scores={}
20
+ until scores.inject(0){|sum,(score,count)| sum + count} >= max_look_for_count
21
+ scores = calculate_count_distribution_after_threshold(threshold_gauss_estimation(max_pvalue))
22
+ max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue
23
+ end
24
+ pvalue_counts = pvalues.sort.inject(Hash.new){|h, pvalue| h.merge pvalue => pvalue * sum_of_probabilities**length }
25
+ look_for_counts = pvalue_counts.to_a
26
+ sum_count = 0
27
+ scores = scores.sort.reverse
28
+ results = {}
29
+ scores.size.times do |i|
30
+ while !look_for_counts.empty? and sum_count + scores[i][1] > look_for_counts.first[1] # usually this 'while' works as 'if'
31
+ cnt = look_for_counts.shift
32
+ pval = cnt[0]
33
+ score = cnt[1]
34
+
35
+ threshold_2 = scores[i][0]
36
+ sum_count_2 = sum_count + scores[i][1]
37
+ if i>0
38
+ threshold = scores[i-1][0]
39
+ results[pval] = [threshold_2.to_f, sum_count_2, threshold.to_f, sum_count.to_f]
40
+ else
41
+ results[pval] = [threshold_2.to_f, sum_count_2.to_f, best_score + 1.0, 0.0]
42
+ end
43
+ end
44
+ sum_count += scores[i][1]
45
+ end
46
+ results
47
+ end
48
+
49
+ def calculate_count_distribution_after_threshold(threshold)
50
+ scores = { 0 => 1 }
51
+ length.times do |column|
52
+ new_scores = Hash.new(0);
53
+ scores.each do |score, count|
54
+ 4.times do |letter|
55
+ new_score = score + matrix[column][letter]
56
+ if new_score + best_suffix[column + 1] >= threshold
57
+ new_scores[new_score] += count * probabilities[letter]
58
+ end
59
+ end
60
+ end
61
+ raise 'Hash overflow in PWM::ThresholdByPvalue#calculate_count_distribution_after_threshold' if defined? MaxHashSize and new_scores.size > MaxHashSize
62
+ scores = new_scores
63
+ end
64
+ scores
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,3 @@
1
+ module Macroape
2
+ VERSION = "3.2.2"
3
+ end
data/lib/macroape.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'macroape/version'
2
+ require 'yaml'
3
+
4
+ require 'macroape/extract_pwm'
5
+ require 'macroape/support'
6
+ require 'macroape/matrix_transformations'
7
+ require 'macroape/matrix_information'
8
+ require 'macroape/gauss_estimation'
9
+ require 'macroape/threshold_by_pvalue'
10
+ require 'macroape/single_matrix'
11
+ require 'macroape/count_by_threshold'
12
+ require 'macroape/matrix_on_background'
13
+
14
+ require 'macroape/aligned_pair_transformations'
15
+ require 'macroape/aligned_pair_metrics'
16
+ require 'macroape/aligned_pair_intersection'
17
+ require 'macroape/pwm_compare_aligned'
18
+
19
+ require 'macroape/pair_transformations'
20
+ require 'macroape/pair_metrics'
21
+ require 'macroape/pwm_compare'
22
+ require 'macroape/collection'
23
+
24
+ module Macroape
25
+ # Your code goes here...
26
+ end
data/macroape.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/macroape/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Ilya Vorontsov"]
6
+ gem.email = ["prijutme4ty@gmail.com"]
7
+ gem.description = %q{Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value Estimation. It's a bioinformatic tool for evaluating similarity measure and best alignment between a pair of Position Weight Matrices(PWM), finding thresholds by P-values and inside out and even searching a collection of motifs for the most similar ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw}
8
+ gem.summary = %q{PWM comparison tool using MACROAPE approach}
9
+ gem.homepage = "http://autosome.ru/macroape/"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "macroape"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Macroape::VERSION
17
+ end
@@ -0,0 +1,10 @@
1
+ AHR_si
2
+ 0.049659785047588834 -0.7112292711652757 0.3721858143748157 0.007118345755728267
3
+ -1.1863517717347094 -1.107677589450009 -0.10122262270571995 0.9004092051347228
4
+ -0.5481419620395529 0.12723771473208506 -0.5930888656288792 0.5502420250441413
5
+ -2.3460850844035757 -2.7362137930742167 1.3086872088653387 -1.9795601218727432
6
+ -3.45212725495741 1.3387809407527818 -2.67026944276278 -2.46866856874775
7
+ -2.1349735467777395 -2.965000508388952 1.3322808172765488 -3.206089427067848
8
+ -3.45212725495741 -2.1349735467777395 -3.0087911229263207 1.335191259740944
9
+ -3.45212725495741 -3.45212725495741 1.3622489404156526 -3.45212725495741
10
+ 0.10927007207680632 0.5389064901558549 -0.8250174241334556 -0.3117320885632525
@@ -0,0 +1,11 @@
1
+ KLF4_f2.xml
2
+ 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
3
+ -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
4
+ -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
5
+ -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
6
+ -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
7
+ -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
8
+ -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
9
+ -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
10
+ -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
11
+ -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
@@ -0,0 +1,4 @@
1
+ #pwm similarity shift overlap orientation
2
+ KLF4_f2 1.0 0 10 direct
3
+ SP1_f1 0.22754919499105544 -1 10 direct
4
+ GABPA_f1 0.00043527658136684877 -8 5 direct
@@ -0,0 +1,3 @@
1
+ #pwm similarity shift overlap orientation
2
+ KLF4_f2 1.0 0 10 direct
3
+ SP1_f1 0.22754919499105544 -1 10 direct
@@ -0,0 +1,4 @@
1
+ #pwm similarity shift overlap orientation
2
+ KLF4_f2 1.0 0 10 direct *
3
+ SP1_f1 0.2420758234928527 -1 10 direct *
4
+ GABPA_f1 0.00043527658136684877 -8 5 direct
@@ -0,0 +1,12 @@
1
+ > SP1_f1
2
+ -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
3
+ -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
4
+ -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
5
+ -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
6
+ -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
7
+ -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
8
+ -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
9
+ -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
10
+ -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
11
+ -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
12
+ -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
@@ -0,0 +1,12 @@
1
+ SP1_f1_revcomp
2
+ -0.6825482097865606 -0.28626775700282125 0.7641033353626651 -0.5166047365590577
3
+ -1.6080243648157357 1.163676006374752 -1.2251832285630033 -1.1896356092245055
4
+ -1.7780413702431377 1.126543157436868 -2.2510053061629707 -0.4450938582835542
5
+ -2.0846739035376483 1.338748858978805 -3.138426807809667 -4.015548413965584
6
+ -2.3243342594990253 1.3113096718240569 -2.9784027708801153 -1.9643526491643326
7
+ -0.851098348782244 -2.624685764808605 0.9449196547620103 -0.07473964149330914
8
+ -4.8899306705082335 1.3510439136452728 -3.719643631330185 -2.2369752892820087
9
+ -5.077972423609655 1.3684371349982631 -2.916163300253228 -4.563896055436894
10
+ -4.338927525031567 1.1807800242010371 -2.973985191321805 -0.387227623476054
11
+ -0.6138563775211981 1.0960627561110399 -2.1119259694238686 -1.0631255752097801
12
+ -1.1060188862599292 0.8657012535789861 -0.6748234046937317 -0.24435707885585334
@@ -0,0 +1,14 @@
1
+ GABPA_f1
2
+ -0.1106670158341858 0.013801606113892391 0.6054596108973699 -1.3518085041421573
3
+ 0.37030668921643345 0.15761121480429963 0.009069314183831202 -0.9888619717703562
4
+ 0.47526546359546684 -0.3011678534572083 0.4031522994412777 -1.8638752827041059
5
+ -1.5544255540164373 1.1082369687811506 -0.2814091552834454 -5.30708531823271
6
+ -0.6362037835776368 1.235338189985594 -3.5801322928552253 -5.717323067092849
7
+ -5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
8
+ -5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
9
+ 1.3835219739184708 -5.2341956006430985 -5.852906870733575 -5.852906870733575
10
+ 1.3756340514956562 -5.394962755562375 -5.394962755562375 -3.401117964959733
11
+ -1.2176198315414444 -3.109079898175411 1.2964067931472216 -5.717323067092849
12
+ -1.3716559438167257 -0.2761401935045069 -1.8504445165866068 1.0404320473626856
13
+ -0.5440863133031895 -0.48103682561971345 0.907381908447086 -1.1280642594012078
14
+ 0.10557340209290218 -0.01814819455289191 0.4381106695354074 -1.0304105539540915
@@ -0,0 +1,11 @@
1
+ KLF4_f2
2
+ 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
3
+ -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
4
+ -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
5
+ -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
6
+ -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
7
+ -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
8
+ -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
9
+ -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
10
+ -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
11
+ -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
@@ -0,0 +1,12 @@
1
+ > SP1_f1
2
+ -0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
3
+ -1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
4
+ -0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
5
+ -4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
6
+ -2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
7
+ -0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
8
+ -1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
9
+ -4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
10
+ -0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
11
+ -1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
12
+ -0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
@@ -0,0 +1,186 @@
1
+ --- !ruby/object:PWM::Collection
2
+ pvalues:
3
+ - 0.0005
4
+ - 0.0001
5
+ - 5.0e-05
6
+ background:
7
+ - 1
8
+ - 1
9
+ - 1
10
+ - 1
11
+ precise_discretization: 10
12
+ rough_discretization: 1
13
+ pwms:
14
+ GABPA_f1: !ruby/object:PWM::SingleMatrix
15
+ matrix:
16
+ - - -0.1106670158341858
17
+ - 0.013801606113892391
18
+ - 0.6054596108973699
19
+ - -1.3518085041421573
20
+ - - 0.37030668921643345
21
+ - 0.15761121480429963
22
+ - 0.009069314183831202
23
+ - -0.9888619717703562
24
+ - - 0.47526546359546684
25
+ - -0.3011678534572083
26
+ - 0.4031522994412777
27
+ - -1.8638752827041059
28
+ - - -1.5544255540164373
29
+ - 1.1082369687811506
30
+ - -0.2814091552834454
31
+ - -5.30708531823271
32
+ - - -0.6362037835776368
33
+ - 1.235338189985594
34
+ - -3.5801322928552253
35
+ - -5.717323067092849
36
+ - - -5.852906870733575
37
+ - -5.852906870733575
38
+ - 1.3841383838057746
39
+ - -5.852906870733575
40
+ - - -5.852906870733575
41
+ - -5.852906870733575
42
+ - 1.3841383838057746
43
+ - -5.852906870733575
44
+ - - 1.3835219739184708
45
+ - -5.2341956006430985
46
+ - -5.852906870733575
47
+ - -5.852906870733575
48
+ - - 1.3756340514956562
49
+ - -5.394962755562375
50
+ - -5.394962755562375
51
+ - -3.401117964959733
52
+ - - -1.2176198315414444
53
+ - -3.109079898175411
54
+ - 1.2964067931472216
55
+ - -5.717323067092849
56
+ - - -1.3716559438167257
57
+ - -0.2761401935045069
58
+ - -1.8504445165866068
59
+ - 1.0404320473626856
60
+ - - -0.5440863133031895
61
+ - -0.48103682561971345
62
+ - 0.907381908447086
63
+ - -1.1280642594012078
64
+ - - 0.10557340209290218
65
+ - -0.01814819455289191
66
+ - 0.4381106695354074
67
+ - -1.0304105539540915
68
+ name: GABPA_f1
69
+ KLF4_f2: !ruby/object:PWM::SingleMatrix
70
+ matrix:
71
+ - - 0.30861857265872605
72
+ - -2.254321000121579
73
+ - 0.13505703522674192
74
+ - 0.3285194224375633
75
+ - - -1.227018967707036
76
+ - -4.814127713368663
77
+ - 1.3059890687390967
78
+ - -4.908681463544344
79
+ - - -2.443469374521196
80
+ - -4.648238485031404
81
+ - 1.3588686548279805
82
+ - -4.441801801188402
83
+ - - -2.7177827948276123
84
+ - -3.8073538975356565
85
+ - 1.356272809724262
86
+ - -3.504104725510225
87
+ - - -0.5563232977367343
88
+ - 0.5340697765121405
89
+ - -3.61417723090579
90
+ - 0.5270259776377405
91
+ - - -1.8687622060887386
92
+ - -4.381483976582316
93
+ - 1.337932245336098
94
+ - -3.815629658877517
95
+ - - -2.045671123823928
96
+ - -2.384975142213679
97
+ - 0.7198551207724355
98
+ - 0.5449254135616948
99
+ - - -1.373157530374372
100
+ - -3.0063112097748217
101
+ - 1.285188335493552
102
+ - -2.5026044231773543
103
+ - - -2.1030513122772208
104
+ - -1.8941348100402244
105
+ - 1.249265758393991
106
+ - -1.4284210948906104
107
+ - - -1.3277128628152939
108
+ - 0.8982415633049462
109
+ - -0.8080773665408135
110
+ - -0.18161647647456935
111
+ name: KLF4_f2
112
+ SP1_f1: !ruby/object:PWM::SingleMatrix
113
+ matrix:
114
+ - - -0.24435707885585334
115
+ - -0.6748234046937317
116
+ - 0.8657012535789861
117
+ - -1.1060188862599292
118
+ - - -1.0631255752097801
119
+ - -2.1119259694238686
120
+ - 1.0960627561110399
121
+ - -0.6138563775211981
122
+ - - -0.387227623476054
123
+ - -2.973985191321805
124
+ - 1.1807800242010371
125
+ - -4.338927525031567
126
+ - - -4.563896055436894
127
+ - -2.916163300253228
128
+ - 1.3684371349982631
129
+ - -5.077972423609655
130
+ - - -2.2369752892820087
131
+ - -3.719643631330185
132
+ - 1.3510439136452728
133
+ - -4.8899306705082335
134
+ - - -0.07473964149330914
135
+ - 0.9449196547620103
136
+ - -2.624685764808605
137
+ - -0.851098348782244
138
+ - - -1.9643526491643326
139
+ - -2.9784027708801153
140
+ - 1.3113096718240569
141
+ - -2.3243342594990253
142
+ - - -4.015548413965584
143
+ - -3.138426807809667
144
+ - 1.338748858978805
145
+ - -2.0846739035376483
146
+ - - -0.4450938582835542
147
+ - -2.2510053061629707
148
+ - 1.126543157436868
149
+ - -1.7780413702431377
150
+ - - -1.1896356092245055
151
+ - -1.2251832285630033
152
+ - 1.163676006374752
153
+ - -1.6080243648157357
154
+ - - -0.5166047365590577
155
+ - 0.7641033353626651
156
+ - -0.28626775700282125
157
+ - -0.6825482097865606
158
+ name: SP1_f1
159
+ infos:
160
+ GABPA_f1:
161
+ :rough:
162
+ 5.0e-05: 16.1
163
+ 0.0001: 15.1
164
+ 0.0005: 12.1
165
+ :precise:
166
+ 5.0e-05: 8.61
167
+ 0.0001: 7.609999999999999
168
+ 0.0005: 4.51
169
+ KLF4_f2:
170
+ :rough:
171
+ 5.0e-05: 14.1
172
+ 0.0001: 13.1
173
+ 0.0005: 11.1
174
+ :precise:
175
+ 5.0e-05: 8.51
176
+ 0.0001: 7.909999999999999
177
+ 0.0005: 5.8100000000000005
178
+ SP1_f1:
179
+ :rough:
180
+ 5.0e-05: 14.1
181
+ 0.0001: 14.1
182
+ 0.0005: 11.1
183
+ :precise:
184
+ 5.0e-05: 8.51
185
+ 0.0001: 7.709999999999999
186
+ 0.0005: 5.61
@@ -0,0 +1,125 @@
1
+ require 'test/unit'
2
+
3
+ module Helpers
4
+ def self.obtain_pvalue_by_threshold(args)
5
+ IO.popen("find_pvalue #{args}",&:read).strip.split.last
6
+ end
7
+ end
8
+
9
+ class FindThresholdTest < Test::Unit::TestCase
10
+ def test_process_several_pvalues
11
+ pvalues = []
12
+ IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001 0.0005', &:read).lines.each{|line|
13
+ pvalue, threshold, real_pvalue = line.strip.split("\t")
14
+ pvalues << pvalue
15
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
16
+ }
17
+ assert_equal pvalues, ['0.0005', '0.001']
18
+ end
19
+ def test_process_one_pvalue
20
+ pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001', &:read).strip.split("\t")
21
+ assert_equal '0.001', pvalue
22
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
23
+ end
24
+ def test_process_default_pvalue
25
+ pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat', &:read).strip.split("\t")
26
+ assert_equal '0.0005', pvalue
27
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
28
+ end
29
+ def test_custom_discretization
30
+ pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -d 100',&:read).strip.split("\t")
31
+ assert_equal '0.0005', pvalue
32
+ assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold} -d 100"), real_pvalue
33
+ end
34
+ end
35
+
36
+ class FindPvalueTest < Test::Unit::TestCase
37
+ def test_process_one_threshold
38
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719'){|f|
39
+ assert_equal "4.1719\t1048.0\t0.00099945068359375\n", f.read
40
+ }
41
+ end
42
+ def test_process_several_thresholds
43
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719 5.2403'){|f|
44
+ assert_equal "4.1719\t1048.0\t0.00099945068359375\n5.2403\t524.0\t0.000499725341796875\n", f.read
45
+ }
46
+ end
47
+ def test_process_several_thresholds_result_is_ordered
48
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 4.1719'){|f|
49
+ assert_equal "5.2403\t524.0\t0.000499725341796875\n4.1719\t1048.0\t0.00099945068359375\n", f.read
50
+ }
51
+ end
52
+ def test_custom_discretization
53
+ IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 -d 100'){|f|
54
+ assert_equal "5.2403\t527.0\t0.0005025863647460938\n", f.read
55
+ }
56
+ end
57
+ end
58
+
59
+
60
+ class TestEvalSimilarity < Test::Unit::TestCase
61
+ def test_process_pair_of_pwms
62
+ IO.popen('eval_similarity test/data/KLF4_f2.pat test/data/SP1_f1.pat'){|f|
63
+ assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
64
+ }
65
+ end
66
+ def test_process_another_pair_of_pwms
67
+ IO.popen('eval_similarity test/data/SP1_f1.pat test/data/AHR_si.pat'){|f|
68
+ assert_equal "0.0037332005973120955\n15.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>.\n1\tdirect\n", f.read
69
+ }
70
+ end
71
+
72
+ def test_recognize_orientation_of_alignment
73
+ IO.popen('eval_similarity test/data/SP1_f1_revcomp.pat test/data/SP1_f1.pat'){|f|
74
+ assert_equal "1.0\n2033.0\t11\n>>>>>>>>>>>\n<<<<<<<<<<<\n0\trevcomp\n", f.read
75
+ }
76
+ end
77
+
78
+ def test_process_custom_discretization
79
+ IO.popen('eval_similarity test/data/SP1_f1.pat test/data/KLF4_f2.pat -d 1'){|f|
80
+ assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n", f.read
81
+ }
82
+ end
83
+ end
84
+
85
+ class TestEvalAlignmentSimilarity < Test::Unit::TestCase
86
+ def test_process_at_optimal_alignment
87
+ IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct '){|f|
88
+ assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
89
+ }
90
+ end
91
+ def test_process_not_optimal_alignment
92
+ IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct '){|f|
93
+ assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
94
+ }
95
+ end
96
+ def test_process_at_optimal_alignment_reversed
97
+ IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp '){|f|
98
+ assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
99
+ }
100
+ end
101
+ end
102
+
103
+ class TestPreprocessCollection < Test::Unit::TestCase
104
+ def test_multipvalue_preproceessing
105
+ system('preprocess_collection ./test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
106
+ assert_equal File.read('test/data/test_collection.yaml'), File.read('test/data/test_collection.yaml.tmp')
107
+ File.delete 'test/data/test_collection.yaml.tmp'
108
+ end
109
+ end
110
+
111
+ class TestScanCollection < Test::Unit::TestCase
112
+ def test_scan_default_cutoff
113
+ assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt'),
114
+ IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --silent', &:read)
115
+ end
116
+ def test_scan_and_output_all_results
117
+ assert_equal File.read('test/data/KLF4_f2_scan_results_all.txt'),
118
+ IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --all --silent', &:read)
119
+
120
+ end
121
+ def test_scan_precise_mode
122
+ assert_equal File.read('test/data/KLF4_f2_scan_results_precise_mode.txt'),
123
+ IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --precise --all --silent', &:read)
124
+ end
125
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: macroape
3
+ version: !ruby/object:Gem::Version
4
+ version: 3.2.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ilya Vorontsov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-28 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
15
+ Estimation. It's a bioinformatic tool for evaluating similarity measure and best
16
+ alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
17
+ P-values and inside out and even searching a collection of motifs for the most similar
18
+ ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
19
+ email:
20
+ - prijutme4ty@gmail.com
21
+ executables:
22
+ - eval_alignment
23
+ - eval_similarity
24
+ - find_pvalue
25
+ - find_threshold
26
+ - preprocess_collection
27
+ - scan_collection
28
+ extensions: []
29
+ extra_rdoc_files: []
30
+ files:
31
+ - .gitignore
32
+ - Gemfile
33
+ - LICENSE
34
+ - README.md
35
+ - Rakefile
36
+ - bin/eval_alignment
37
+ - bin/eval_similarity
38
+ - bin/find_pvalue
39
+ - bin/find_threshold
40
+ - bin/preprocess_collection
41
+ - bin/scan_collection
42
+ - lib/macroape.rb
43
+ - lib/macroape/aligned_pair_intersection.rb
44
+ - lib/macroape/aligned_pair_metrics.rb
45
+ - lib/macroape/aligned_pair_transformations.rb
46
+ - lib/macroape/collection.rb
47
+ - lib/macroape/count_by_threshold.rb
48
+ - lib/macroape/exec/eval_alignment.rb
49
+ - lib/macroape/exec/eval_similarity.rb
50
+ - lib/macroape/exec/find_pvalue.rb
51
+ - lib/macroape/exec/find_threshold.rb
52
+ - lib/macroape/exec/preprocess_collection.rb
53
+ - lib/macroape/exec/scan_collection.rb
54
+ - lib/macroape/extract_pwm.rb
55
+ - lib/macroape/gauss_estimation.rb
56
+ - lib/macroape/matrix_information.rb
57
+ - lib/macroape/matrix_on_background.rb
58
+ - lib/macroape/matrix_transformations.rb
59
+ - lib/macroape/pair_metrics.rb
60
+ - lib/macroape/pair_transformations.rb
61
+ - lib/macroape/pwm_compare.rb
62
+ - lib/macroape/pwm_compare_aligned.rb
63
+ - lib/macroape/single_matrix.rb
64
+ - lib/macroape/support.rb
65
+ - lib/macroape/threshold_by_pvalue.rb
66
+ - lib/macroape/version.rb
67
+ - macroape.gemspec
68
+ - test/data/AHR_si.pat
69
+ - test/data/KLF4_f2.pat
70
+ - test/data/KLF4_f2_scan_results_all.txt
71
+ - test/data/KLF4_f2_scan_results_default_cutoff.txt
72
+ - test/data/KLF4_f2_scan_results_precise_mode.txt
73
+ - test/data/SP1_f1.pat
74
+ - test/data/SP1_f1_revcomp.pat
75
+ - test/data/test_collection.yaml
76
+ - test/data/test_collection/GABPA_f1.pat
77
+ - test/data/test_collection/KLF4_f2.pat
78
+ - test/data/test_collection/SP1_f1.pat
79
+ - test/macroape_test.rb
80
+ homepage: http://autosome.ru/macroape/
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 1.8.24
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: PWM comparison tool using MACROAPE approach
104
+ test_files:
105
+ - test/data/AHR_si.pat
106
+ - test/data/KLF4_f2.pat
107
+ - test/data/KLF4_f2_scan_results_all.txt
108
+ - test/data/KLF4_f2_scan_results_default_cutoff.txt
109
+ - test/data/KLF4_f2_scan_results_precise_mode.txt
110
+ - test/data/SP1_f1.pat
111
+ - test/data/SP1_f1_revcomp.pat
112
+ - test/data/test_collection.yaml
113
+ - test/data/test_collection/GABPA_f1.pat
114
+ - test/data/test_collection/KLF4_f2.pat
115
+ - test/data/test_collection/SP1_f1.pat
116
+ - test/macroape_test.rb