macroape 3.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +61 -0
- data/Rakefile +7 -0
- data/bin/eval_alignment +3 -0
- data/bin/eval_similarity +3 -0
- data/bin/find_pvalue +3 -0
- data/bin/find_threshold +3 -0
- data/bin/preprocess_collection +3 -0
- data/bin/scan_collection +3 -0
- data/lib/macroape/aligned_pair_intersection.rb +136 -0
- data/lib/macroape/aligned_pair_metrics.rb +24 -0
- data/lib/macroape/aligned_pair_transformations.rb +23 -0
- data/lib/macroape/collection.rb +15 -0
- data/lib/macroape/count_by_threshold.rb +34 -0
- data/lib/macroape/exec/eval_alignment.rb +141 -0
- data/lib/macroape/exec/eval_similarity.rb +107 -0
- data/lib/macroape/exec/find_pvalue.rb +80 -0
- data/lib/macroape/exec/find_threshold.rb +76 -0
- data/lib/macroape/exec/preprocess_collection.rb +94 -0
- data/lib/macroape/exec/scan_collection.rb +124 -0
- data/lib/macroape/extract_pwm.rb +32 -0
- data/lib/macroape/gauss_estimation.rb +30 -0
- data/lib/macroape/matrix_information.rb +29 -0
- data/lib/macroape/matrix_on_background.rb +16 -0
- data/lib/macroape/matrix_transformations.rb +29 -0
- data/lib/macroape/pair_metrics.rb +9 -0
- data/lib/macroape/pair_transformations.rb +28 -0
- data/lib/macroape/pwm_compare.rb +10 -0
- data/lib/macroape/pwm_compare_aligned.rb +13 -0
- data/lib/macroape/single_matrix.rb +45 -0
- data/lib/macroape/support.rb +34 -0
- data/lib/macroape/threshold_by_pvalue.rb +68 -0
- data/lib/macroape/version.rb +3 -0
- data/lib/macroape.rb +26 -0
- data/macroape.gemspec +17 -0
- data/test/data/AHR_si.pat +10 -0
- data/test/data/KLF4_f2.pat +11 -0
- data/test/data/KLF4_f2_scan_results_all.txt +4 -0
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +3 -0
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +4 -0
- data/test/data/SP1_f1.pat +12 -0
- data/test/data/SP1_f1_revcomp.pat +12 -0
- data/test/data/test_collection/GABPA_f1.pat +14 -0
- data/test/data/test_collection/KLF4_f2.pat +11 -0
- data/test/data/test_collection/SP1_f1.pat +12 -0
- data/test/data/test_collection.yaml +186 -0
- data/test/macroape_test.rb +125 -0
- metadata +116 -0
@@ -0,0 +1,68 @@
|
|
1
|
+
module PWM
|
2
|
+
module ThresholdByPvalue
|
3
|
+
def threshold(pvalue)
|
4
|
+
thresholds(pvalue){|_, thresh, _| return thresh }
|
5
|
+
end
|
6
|
+
|
7
|
+
def thresholds(*pvalues)
|
8
|
+
thresholds_by_pvalues(*pvalues).each do |pvalue,(left_threshold, left_count, right_threshold, right_count)|
|
9
|
+
threshold = left_threshold + 0.1
|
10
|
+
real_pvalue = right_count.to_f / number_of_words
|
11
|
+
yield pvalue, threshold, real_pvalue
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# ret-value: hash {pvalue => [left_threshold, left_count, right_threshold, right_count]}
|
16
|
+
def thresholds_by_pvalues(*pvalues)
|
17
|
+
max_pvalue = pvalues.max
|
18
|
+
max_look_for_count = max_pvalue * sum_of_probabilities ** length
|
19
|
+
scores={}
|
20
|
+
until scores.inject(0){|sum,(score,count)| sum + count} >= max_look_for_count
|
21
|
+
scores = calculate_count_distribution_after_threshold(threshold_gauss_estimation(max_pvalue))
|
22
|
+
max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue
|
23
|
+
end
|
24
|
+
pvalue_counts = pvalues.sort.inject(Hash.new){|h, pvalue| h.merge pvalue => pvalue * sum_of_probabilities**length }
|
25
|
+
look_for_counts = pvalue_counts.to_a
|
26
|
+
sum_count = 0
|
27
|
+
scores = scores.sort.reverse
|
28
|
+
results = {}
|
29
|
+
scores.size.times do |i|
|
30
|
+
while !look_for_counts.empty? and sum_count + scores[i][1] > look_for_counts.first[1] # usually this 'while' works as 'if'
|
31
|
+
cnt = look_for_counts.shift
|
32
|
+
pval = cnt[0]
|
33
|
+
score = cnt[1]
|
34
|
+
|
35
|
+
threshold_2 = scores[i][0]
|
36
|
+
sum_count_2 = sum_count + scores[i][1]
|
37
|
+
if i>0
|
38
|
+
threshold = scores[i-1][0]
|
39
|
+
results[pval] = [threshold_2.to_f, sum_count_2, threshold.to_f, sum_count.to_f]
|
40
|
+
else
|
41
|
+
results[pval] = [threshold_2.to_f, sum_count_2.to_f, best_score + 1.0, 0.0]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
sum_count += scores[i][1]
|
45
|
+
end
|
46
|
+
results
|
47
|
+
end
|
48
|
+
|
49
|
+
def calculate_count_distribution_after_threshold(threshold)
|
50
|
+
scores = { 0 => 1 }
|
51
|
+
length.times do |column|
|
52
|
+
new_scores = Hash.new(0);
|
53
|
+
scores.each do |score, count|
|
54
|
+
4.times do |letter|
|
55
|
+
new_score = score + matrix[column][letter]
|
56
|
+
if new_score + best_suffix[column + 1] >= threshold
|
57
|
+
new_scores[new_score] += count * probabilities[letter]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
raise 'Hash overflow in PWM::ThresholdByPvalue#calculate_count_distribution_after_threshold' if defined? MaxHashSize and new_scores.size > MaxHashSize
|
62
|
+
scores = new_scores
|
63
|
+
end
|
64
|
+
scores
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
data/lib/macroape.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'macroape/version'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
require 'macroape/extract_pwm'
|
5
|
+
require 'macroape/support'
|
6
|
+
require 'macroape/matrix_transformations'
|
7
|
+
require 'macroape/matrix_information'
|
8
|
+
require 'macroape/gauss_estimation'
|
9
|
+
require 'macroape/threshold_by_pvalue'
|
10
|
+
require 'macroape/single_matrix'
|
11
|
+
require 'macroape/count_by_threshold'
|
12
|
+
require 'macroape/matrix_on_background'
|
13
|
+
|
14
|
+
require 'macroape/aligned_pair_transformations'
|
15
|
+
require 'macroape/aligned_pair_metrics'
|
16
|
+
require 'macroape/aligned_pair_intersection'
|
17
|
+
require 'macroape/pwm_compare_aligned'
|
18
|
+
|
19
|
+
require 'macroape/pair_transformations'
|
20
|
+
require 'macroape/pair_metrics'
|
21
|
+
require 'macroape/pwm_compare'
|
22
|
+
require 'macroape/collection'
|
23
|
+
|
24
|
+
module Macroape
|
25
|
+
# Your code goes here...
|
26
|
+
end
|
data/macroape.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/macroape/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Ilya Vorontsov"]
|
6
|
+
gem.email = ["prijutme4ty@gmail.com"]
|
7
|
+
gem.description = %q{Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value Estimation. It's a bioinformatic tool for evaluating similarity measure and best alignment between a pair of Position Weight Matrices(PWM), finding thresholds by P-values and inside out and even searching a collection of motifs for the most similar ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw}
|
8
|
+
gem.summary = %q{PWM comparison tool using MACROAPE approach}
|
9
|
+
gem.homepage = "http://autosome.ru/macroape/"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "macroape"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Macroape::VERSION
|
17
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
AHR_si
|
2
|
+
0.049659785047588834 -0.7112292711652757 0.3721858143748157 0.007118345755728267
|
3
|
+
-1.1863517717347094 -1.107677589450009 -0.10122262270571995 0.9004092051347228
|
4
|
+
-0.5481419620395529 0.12723771473208506 -0.5930888656288792 0.5502420250441413
|
5
|
+
-2.3460850844035757 -2.7362137930742167 1.3086872088653387 -1.9795601218727432
|
6
|
+
-3.45212725495741 1.3387809407527818 -2.67026944276278 -2.46866856874775
|
7
|
+
-2.1349735467777395 -2.965000508388952 1.3322808172765488 -3.206089427067848
|
8
|
+
-3.45212725495741 -2.1349735467777395 -3.0087911229263207 1.335191259740944
|
9
|
+
-3.45212725495741 -3.45212725495741 1.3622489404156526 -3.45212725495741
|
10
|
+
0.10927007207680632 0.5389064901558549 -0.8250174241334556 -0.3117320885632525
|
@@ -0,0 +1,11 @@
|
|
1
|
+
KLF4_f2.xml
|
2
|
+
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
3
|
+
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
4
|
+
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
5
|
+
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
6
|
+
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
7
|
+
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
8
|
+
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
9
|
+
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
10
|
+
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
11
|
+
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
@@ -0,0 +1,12 @@
|
|
1
|
+
> SP1_f1
|
2
|
+
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
3
|
+
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
4
|
+
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
5
|
+
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
6
|
+
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
7
|
+
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
8
|
+
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
9
|
+
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
10
|
+
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
11
|
+
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
12
|
+
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
|
@@ -0,0 +1,12 @@
|
|
1
|
+
SP1_f1_revcomp
|
2
|
+
-0.6825482097865606 -0.28626775700282125 0.7641033353626651 -0.5166047365590577
|
3
|
+
-1.6080243648157357 1.163676006374752 -1.2251832285630033 -1.1896356092245055
|
4
|
+
-1.7780413702431377 1.126543157436868 -2.2510053061629707 -0.4450938582835542
|
5
|
+
-2.0846739035376483 1.338748858978805 -3.138426807809667 -4.015548413965584
|
6
|
+
-2.3243342594990253 1.3113096718240569 -2.9784027708801153 -1.9643526491643326
|
7
|
+
-0.851098348782244 -2.624685764808605 0.9449196547620103 -0.07473964149330914
|
8
|
+
-4.8899306705082335 1.3510439136452728 -3.719643631330185 -2.2369752892820087
|
9
|
+
-5.077972423609655 1.3684371349982631 -2.916163300253228 -4.563896055436894
|
10
|
+
-4.338927525031567 1.1807800242010371 -2.973985191321805 -0.387227623476054
|
11
|
+
-0.6138563775211981 1.0960627561110399 -2.1119259694238686 -1.0631255752097801
|
12
|
+
-1.1060188862599292 0.8657012535789861 -0.6748234046937317 -0.24435707885585334
|
@@ -0,0 +1,14 @@
|
|
1
|
+
GABPA_f1
|
2
|
+
-0.1106670158341858 0.013801606113892391 0.6054596108973699 -1.3518085041421573
|
3
|
+
0.37030668921643345 0.15761121480429963 0.009069314183831202 -0.9888619717703562
|
4
|
+
0.47526546359546684 -0.3011678534572083 0.4031522994412777 -1.8638752827041059
|
5
|
+
-1.5544255540164373 1.1082369687811506 -0.2814091552834454 -5.30708531823271
|
6
|
+
-0.6362037835776368 1.235338189985594 -3.5801322928552253 -5.717323067092849
|
7
|
+
-5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
|
8
|
+
-5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
|
9
|
+
1.3835219739184708 -5.2341956006430985 -5.852906870733575 -5.852906870733575
|
10
|
+
1.3756340514956562 -5.394962755562375 -5.394962755562375 -3.401117964959733
|
11
|
+
-1.2176198315414444 -3.109079898175411 1.2964067931472216 -5.717323067092849
|
12
|
+
-1.3716559438167257 -0.2761401935045069 -1.8504445165866068 1.0404320473626856
|
13
|
+
-0.5440863133031895 -0.48103682561971345 0.907381908447086 -1.1280642594012078
|
14
|
+
0.10557340209290218 -0.01814819455289191 0.4381106695354074 -1.0304105539540915
|
@@ -0,0 +1,11 @@
|
|
1
|
+
KLF4_f2
|
2
|
+
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
3
|
+
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
4
|
+
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
5
|
+
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
6
|
+
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
7
|
+
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
8
|
+
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
9
|
+
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
10
|
+
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
11
|
+
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
@@ -0,0 +1,12 @@
|
|
1
|
+
> SP1_f1
|
2
|
+
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
3
|
+
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
4
|
+
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
5
|
+
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
6
|
+
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
7
|
+
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
8
|
+
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
9
|
+
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
10
|
+
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
11
|
+
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
12
|
+
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
|
@@ -0,0 +1,186 @@
|
|
1
|
+
--- !ruby/object:PWM::Collection
|
2
|
+
pvalues:
|
3
|
+
- 0.0005
|
4
|
+
- 0.0001
|
5
|
+
- 5.0e-05
|
6
|
+
background:
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
- 1
|
11
|
+
precise_discretization: 10
|
12
|
+
rough_discretization: 1
|
13
|
+
pwms:
|
14
|
+
GABPA_f1: !ruby/object:PWM::SingleMatrix
|
15
|
+
matrix:
|
16
|
+
- - -0.1106670158341858
|
17
|
+
- 0.013801606113892391
|
18
|
+
- 0.6054596108973699
|
19
|
+
- -1.3518085041421573
|
20
|
+
- - 0.37030668921643345
|
21
|
+
- 0.15761121480429963
|
22
|
+
- 0.009069314183831202
|
23
|
+
- -0.9888619717703562
|
24
|
+
- - 0.47526546359546684
|
25
|
+
- -0.3011678534572083
|
26
|
+
- 0.4031522994412777
|
27
|
+
- -1.8638752827041059
|
28
|
+
- - -1.5544255540164373
|
29
|
+
- 1.1082369687811506
|
30
|
+
- -0.2814091552834454
|
31
|
+
- -5.30708531823271
|
32
|
+
- - -0.6362037835776368
|
33
|
+
- 1.235338189985594
|
34
|
+
- -3.5801322928552253
|
35
|
+
- -5.717323067092849
|
36
|
+
- - -5.852906870733575
|
37
|
+
- -5.852906870733575
|
38
|
+
- 1.3841383838057746
|
39
|
+
- -5.852906870733575
|
40
|
+
- - -5.852906870733575
|
41
|
+
- -5.852906870733575
|
42
|
+
- 1.3841383838057746
|
43
|
+
- -5.852906870733575
|
44
|
+
- - 1.3835219739184708
|
45
|
+
- -5.2341956006430985
|
46
|
+
- -5.852906870733575
|
47
|
+
- -5.852906870733575
|
48
|
+
- - 1.3756340514956562
|
49
|
+
- -5.394962755562375
|
50
|
+
- -5.394962755562375
|
51
|
+
- -3.401117964959733
|
52
|
+
- - -1.2176198315414444
|
53
|
+
- -3.109079898175411
|
54
|
+
- 1.2964067931472216
|
55
|
+
- -5.717323067092849
|
56
|
+
- - -1.3716559438167257
|
57
|
+
- -0.2761401935045069
|
58
|
+
- -1.8504445165866068
|
59
|
+
- 1.0404320473626856
|
60
|
+
- - -0.5440863133031895
|
61
|
+
- -0.48103682561971345
|
62
|
+
- 0.907381908447086
|
63
|
+
- -1.1280642594012078
|
64
|
+
- - 0.10557340209290218
|
65
|
+
- -0.01814819455289191
|
66
|
+
- 0.4381106695354074
|
67
|
+
- -1.0304105539540915
|
68
|
+
name: GABPA_f1
|
69
|
+
KLF4_f2: !ruby/object:PWM::SingleMatrix
|
70
|
+
matrix:
|
71
|
+
- - 0.30861857265872605
|
72
|
+
- -2.254321000121579
|
73
|
+
- 0.13505703522674192
|
74
|
+
- 0.3285194224375633
|
75
|
+
- - -1.227018967707036
|
76
|
+
- -4.814127713368663
|
77
|
+
- 1.3059890687390967
|
78
|
+
- -4.908681463544344
|
79
|
+
- - -2.443469374521196
|
80
|
+
- -4.648238485031404
|
81
|
+
- 1.3588686548279805
|
82
|
+
- -4.441801801188402
|
83
|
+
- - -2.7177827948276123
|
84
|
+
- -3.8073538975356565
|
85
|
+
- 1.356272809724262
|
86
|
+
- -3.504104725510225
|
87
|
+
- - -0.5563232977367343
|
88
|
+
- 0.5340697765121405
|
89
|
+
- -3.61417723090579
|
90
|
+
- 0.5270259776377405
|
91
|
+
- - -1.8687622060887386
|
92
|
+
- -4.381483976582316
|
93
|
+
- 1.337932245336098
|
94
|
+
- -3.815629658877517
|
95
|
+
- - -2.045671123823928
|
96
|
+
- -2.384975142213679
|
97
|
+
- 0.7198551207724355
|
98
|
+
- 0.5449254135616948
|
99
|
+
- - -1.373157530374372
|
100
|
+
- -3.0063112097748217
|
101
|
+
- 1.285188335493552
|
102
|
+
- -2.5026044231773543
|
103
|
+
- - -2.1030513122772208
|
104
|
+
- -1.8941348100402244
|
105
|
+
- 1.249265758393991
|
106
|
+
- -1.4284210948906104
|
107
|
+
- - -1.3277128628152939
|
108
|
+
- 0.8982415633049462
|
109
|
+
- -0.8080773665408135
|
110
|
+
- -0.18161647647456935
|
111
|
+
name: KLF4_f2
|
112
|
+
SP1_f1: !ruby/object:PWM::SingleMatrix
|
113
|
+
matrix:
|
114
|
+
- - -0.24435707885585334
|
115
|
+
- -0.6748234046937317
|
116
|
+
- 0.8657012535789861
|
117
|
+
- -1.1060188862599292
|
118
|
+
- - -1.0631255752097801
|
119
|
+
- -2.1119259694238686
|
120
|
+
- 1.0960627561110399
|
121
|
+
- -0.6138563775211981
|
122
|
+
- - -0.387227623476054
|
123
|
+
- -2.973985191321805
|
124
|
+
- 1.1807800242010371
|
125
|
+
- -4.338927525031567
|
126
|
+
- - -4.563896055436894
|
127
|
+
- -2.916163300253228
|
128
|
+
- 1.3684371349982631
|
129
|
+
- -5.077972423609655
|
130
|
+
- - -2.2369752892820087
|
131
|
+
- -3.719643631330185
|
132
|
+
- 1.3510439136452728
|
133
|
+
- -4.8899306705082335
|
134
|
+
- - -0.07473964149330914
|
135
|
+
- 0.9449196547620103
|
136
|
+
- -2.624685764808605
|
137
|
+
- -0.851098348782244
|
138
|
+
- - -1.9643526491643326
|
139
|
+
- -2.9784027708801153
|
140
|
+
- 1.3113096718240569
|
141
|
+
- -2.3243342594990253
|
142
|
+
- - -4.015548413965584
|
143
|
+
- -3.138426807809667
|
144
|
+
- 1.338748858978805
|
145
|
+
- -2.0846739035376483
|
146
|
+
- - -0.4450938582835542
|
147
|
+
- -2.2510053061629707
|
148
|
+
- 1.126543157436868
|
149
|
+
- -1.7780413702431377
|
150
|
+
- - -1.1896356092245055
|
151
|
+
- -1.2251832285630033
|
152
|
+
- 1.163676006374752
|
153
|
+
- -1.6080243648157357
|
154
|
+
- - -0.5166047365590577
|
155
|
+
- 0.7641033353626651
|
156
|
+
- -0.28626775700282125
|
157
|
+
- -0.6825482097865606
|
158
|
+
name: SP1_f1
|
159
|
+
infos:
|
160
|
+
GABPA_f1:
|
161
|
+
:rough:
|
162
|
+
5.0e-05: 16.1
|
163
|
+
0.0001: 15.1
|
164
|
+
0.0005: 12.1
|
165
|
+
:precise:
|
166
|
+
5.0e-05: 8.61
|
167
|
+
0.0001: 7.609999999999999
|
168
|
+
0.0005: 4.51
|
169
|
+
KLF4_f2:
|
170
|
+
:rough:
|
171
|
+
5.0e-05: 14.1
|
172
|
+
0.0001: 13.1
|
173
|
+
0.0005: 11.1
|
174
|
+
:precise:
|
175
|
+
5.0e-05: 8.51
|
176
|
+
0.0001: 7.909999999999999
|
177
|
+
0.0005: 5.8100000000000005
|
178
|
+
SP1_f1:
|
179
|
+
:rough:
|
180
|
+
5.0e-05: 14.1
|
181
|
+
0.0001: 14.1
|
182
|
+
0.0005: 11.1
|
183
|
+
:precise:
|
184
|
+
5.0e-05: 8.51
|
185
|
+
0.0001: 7.709999999999999
|
186
|
+
0.0005: 5.61
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
module Helpers
|
4
|
+
def self.obtain_pvalue_by_threshold(args)
|
5
|
+
IO.popen("find_pvalue #{args}",&:read).strip.split.last
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class FindThresholdTest < Test::Unit::TestCase
|
10
|
+
def test_process_several_pvalues
|
11
|
+
pvalues = []
|
12
|
+
IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001 0.0005', &:read).lines.each{|line|
|
13
|
+
pvalue, threshold, real_pvalue = line.strip.split("\t")
|
14
|
+
pvalues << pvalue
|
15
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
|
16
|
+
}
|
17
|
+
assert_equal pvalues, ['0.0005', '0.001']
|
18
|
+
end
|
19
|
+
def test_process_one_pvalue
|
20
|
+
pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001', &:read).strip.split("\t")
|
21
|
+
assert_equal '0.001', pvalue
|
22
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
|
23
|
+
end
|
24
|
+
def test_process_default_pvalue
|
25
|
+
pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat', &:read).strip.split("\t")
|
26
|
+
assert_equal '0.0005', pvalue
|
27
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
|
28
|
+
end
|
29
|
+
def test_custom_discretization
|
30
|
+
pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -d 100',&:read).strip.split("\t")
|
31
|
+
assert_equal '0.0005', pvalue
|
32
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold} -d 100"), real_pvalue
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class FindPvalueTest < Test::Unit::TestCase
|
37
|
+
def test_process_one_threshold
|
38
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719'){|f|
|
39
|
+
assert_equal "4.1719\t1048.0\t0.00099945068359375\n", f.read
|
40
|
+
}
|
41
|
+
end
|
42
|
+
def test_process_several_thresholds
|
43
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719 5.2403'){|f|
|
44
|
+
assert_equal "4.1719\t1048.0\t0.00099945068359375\n5.2403\t524.0\t0.000499725341796875\n", f.read
|
45
|
+
}
|
46
|
+
end
|
47
|
+
def test_process_several_thresholds_result_is_ordered
|
48
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 4.1719'){|f|
|
49
|
+
assert_equal "5.2403\t524.0\t0.000499725341796875\n4.1719\t1048.0\t0.00099945068359375\n", f.read
|
50
|
+
}
|
51
|
+
end
|
52
|
+
def test_custom_discretization
|
53
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 -d 100'){|f|
|
54
|
+
assert_equal "5.2403\t527.0\t0.0005025863647460938\n", f.read
|
55
|
+
}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
class TestEvalSimilarity < Test::Unit::TestCase
|
61
|
+
def test_process_pair_of_pwms
|
62
|
+
IO.popen('eval_similarity test/data/KLF4_f2.pat test/data/SP1_f1.pat'){|f|
|
63
|
+
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
|
64
|
+
}
|
65
|
+
end
|
66
|
+
def test_process_another_pair_of_pwms
|
67
|
+
IO.popen('eval_similarity test/data/SP1_f1.pat test/data/AHR_si.pat'){|f|
|
68
|
+
assert_equal "0.0037332005973120955\n15.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>.\n1\tdirect\n", f.read
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_recognize_orientation_of_alignment
|
73
|
+
IO.popen('eval_similarity test/data/SP1_f1_revcomp.pat test/data/SP1_f1.pat'){|f|
|
74
|
+
assert_equal "1.0\n2033.0\t11\n>>>>>>>>>>>\n<<<<<<<<<<<\n0\trevcomp\n", f.read
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_process_custom_discretization
|
79
|
+
IO.popen('eval_similarity test/data/SP1_f1.pat test/data/KLF4_f2.pat -d 1'){|f|
|
80
|
+
assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n", f.read
|
81
|
+
}
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class TestEvalAlignmentSimilarity < Test::Unit::TestCase
|
86
|
+
def test_process_at_optimal_alignment
|
87
|
+
IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct '){|f|
|
88
|
+
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
|
89
|
+
}
|
90
|
+
end
|
91
|
+
def test_process_not_optimal_alignment
|
92
|
+
IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct '){|f|
|
93
|
+
assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
|
94
|
+
}
|
95
|
+
end
|
96
|
+
def test_process_at_optimal_alignment_reversed
|
97
|
+
IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp '){|f|
|
98
|
+
assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
|
99
|
+
}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class TestPreprocessCollection < Test::Unit::TestCase
|
104
|
+
def test_multipvalue_preproceessing
|
105
|
+
system('preprocess_collection ./test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
|
106
|
+
assert_equal File.read('test/data/test_collection.yaml'), File.read('test/data/test_collection.yaml.tmp')
|
107
|
+
File.delete 'test/data/test_collection.yaml.tmp'
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class TestScanCollection < Test::Unit::TestCase
|
112
|
+
def test_scan_default_cutoff
|
113
|
+
assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt'),
|
114
|
+
IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --silent', &:read)
|
115
|
+
end
|
116
|
+
def test_scan_and_output_all_results
|
117
|
+
assert_equal File.read('test/data/KLF4_f2_scan_results_all.txt'),
|
118
|
+
IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --all --silent', &:read)
|
119
|
+
|
120
|
+
end
|
121
|
+
def test_scan_precise_mode
|
122
|
+
assert_equal File.read('test/data/KLF4_f2_scan_results_precise_mode.txt'),
|
123
|
+
IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --precise --all --silent', &:read)
|
124
|
+
end
|
125
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: macroape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 3.2.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ilya Vorontsov
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-28 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
15
|
+
Estimation. It's a bioinformatic tool for evaluating similarity measure and best
|
16
|
+
alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
|
17
|
+
P-values and inside out and even searching a collection of motifs for the most similar
|
18
|
+
ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
|
19
|
+
email:
|
20
|
+
- prijutme4ty@gmail.com
|
21
|
+
executables:
|
22
|
+
- eval_alignment
|
23
|
+
- eval_similarity
|
24
|
+
- find_pvalue
|
25
|
+
- find_threshold
|
26
|
+
- preprocess_collection
|
27
|
+
- scan_collection
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files: []
|
30
|
+
files:
|
31
|
+
- .gitignore
|
32
|
+
- Gemfile
|
33
|
+
- LICENSE
|
34
|
+
- README.md
|
35
|
+
- Rakefile
|
36
|
+
- bin/eval_alignment
|
37
|
+
- bin/eval_similarity
|
38
|
+
- bin/find_pvalue
|
39
|
+
- bin/find_threshold
|
40
|
+
- bin/preprocess_collection
|
41
|
+
- bin/scan_collection
|
42
|
+
- lib/macroape.rb
|
43
|
+
- lib/macroape/aligned_pair_intersection.rb
|
44
|
+
- lib/macroape/aligned_pair_metrics.rb
|
45
|
+
- lib/macroape/aligned_pair_transformations.rb
|
46
|
+
- lib/macroape/collection.rb
|
47
|
+
- lib/macroape/count_by_threshold.rb
|
48
|
+
- lib/macroape/exec/eval_alignment.rb
|
49
|
+
- lib/macroape/exec/eval_similarity.rb
|
50
|
+
- lib/macroape/exec/find_pvalue.rb
|
51
|
+
- lib/macroape/exec/find_threshold.rb
|
52
|
+
- lib/macroape/exec/preprocess_collection.rb
|
53
|
+
- lib/macroape/exec/scan_collection.rb
|
54
|
+
- lib/macroape/extract_pwm.rb
|
55
|
+
- lib/macroape/gauss_estimation.rb
|
56
|
+
- lib/macroape/matrix_information.rb
|
57
|
+
- lib/macroape/matrix_on_background.rb
|
58
|
+
- lib/macroape/matrix_transformations.rb
|
59
|
+
- lib/macroape/pair_metrics.rb
|
60
|
+
- lib/macroape/pair_transformations.rb
|
61
|
+
- lib/macroape/pwm_compare.rb
|
62
|
+
- lib/macroape/pwm_compare_aligned.rb
|
63
|
+
- lib/macroape/single_matrix.rb
|
64
|
+
- lib/macroape/support.rb
|
65
|
+
- lib/macroape/threshold_by_pvalue.rb
|
66
|
+
- lib/macroape/version.rb
|
67
|
+
- macroape.gemspec
|
68
|
+
- test/data/AHR_si.pat
|
69
|
+
- test/data/KLF4_f2.pat
|
70
|
+
- test/data/KLF4_f2_scan_results_all.txt
|
71
|
+
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
72
|
+
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
73
|
+
- test/data/SP1_f1.pat
|
74
|
+
- test/data/SP1_f1_revcomp.pat
|
75
|
+
- test/data/test_collection.yaml
|
76
|
+
- test/data/test_collection/GABPA_f1.pat
|
77
|
+
- test/data/test_collection/KLF4_f2.pat
|
78
|
+
- test/data/test_collection/SP1_f1.pat
|
79
|
+
- test/macroape_test.rb
|
80
|
+
homepage: http://autosome.ru/macroape/
|
81
|
+
licenses: []
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project:
|
100
|
+
rubygems_version: 1.8.24
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: PWM comparison tool using MACROAPE approach
|
104
|
+
test_files:
|
105
|
+
- test/data/AHR_si.pat
|
106
|
+
- test/data/KLF4_f2.pat
|
107
|
+
- test/data/KLF4_f2_scan_results_all.txt
|
108
|
+
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
109
|
+
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
110
|
+
- test/data/SP1_f1.pat
|
111
|
+
- test/data/SP1_f1_revcomp.pat
|
112
|
+
- test/data/test_collection.yaml
|
113
|
+
- test/data/test_collection/GABPA_f1.pat
|
114
|
+
- test/data/test_collection/KLF4_f2.pat
|
115
|
+
- test/data/test_collection/SP1_f1.pat
|
116
|
+
- test/macroape_test.rb
|