macroape 3.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +61 -0
- data/Rakefile +7 -0
- data/bin/eval_alignment +3 -0
- data/bin/eval_similarity +3 -0
- data/bin/find_pvalue +3 -0
- data/bin/find_threshold +3 -0
- data/bin/preprocess_collection +3 -0
- data/bin/scan_collection +3 -0
- data/lib/macroape/aligned_pair_intersection.rb +136 -0
- data/lib/macroape/aligned_pair_metrics.rb +24 -0
- data/lib/macroape/aligned_pair_transformations.rb +23 -0
- data/lib/macroape/collection.rb +15 -0
- data/lib/macroape/count_by_threshold.rb +34 -0
- data/lib/macroape/exec/eval_alignment.rb +141 -0
- data/lib/macroape/exec/eval_similarity.rb +107 -0
- data/lib/macroape/exec/find_pvalue.rb +80 -0
- data/lib/macroape/exec/find_threshold.rb +76 -0
- data/lib/macroape/exec/preprocess_collection.rb +94 -0
- data/lib/macroape/exec/scan_collection.rb +124 -0
- data/lib/macroape/extract_pwm.rb +32 -0
- data/lib/macroape/gauss_estimation.rb +30 -0
- data/lib/macroape/matrix_information.rb +29 -0
- data/lib/macroape/matrix_on_background.rb +16 -0
- data/lib/macroape/matrix_transformations.rb +29 -0
- data/lib/macroape/pair_metrics.rb +9 -0
- data/lib/macroape/pair_transformations.rb +28 -0
- data/lib/macroape/pwm_compare.rb +10 -0
- data/lib/macroape/pwm_compare_aligned.rb +13 -0
- data/lib/macroape/single_matrix.rb +45 -0
- data/lib/macroape/support.rb +34 -0
- data/lib/macroape/threshold_by_pvalue.rb +68 -0
- data/lib/macroape/version.rb +3 -0
- data/lib/macroape.rb +26 -0
- data/macroape.gemspec +17 -0
- data/test/data/AHR_si.pat +10 -0
- data/test/data/KLF4_f2.pat +11 -0
- data/test/data/KLF4_f2_scan_results_all.txt +4 -0
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +3 -0
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +4 -0
- data/test/data/SP1_f1.pat +12 -0
- data/test/data/SP1_f1_revcomp.pat +12 -0
- data/test/data/test_collection/GABPA_f1.pat +14 -0
- data/test/data/test_collection/KLF4_f2.pat +11 -0
- data/test/data/test_collection/SP1_f1.pat +12 -0
- data/test/data/test_collection.yaml +186 -0
- data/test/macroape_test.rb +125 -0
- metadata +116 -0
@@ -0,0 +1,68 @@
|
|
1
|
+
module PWM
|
2
|
+
module ThresholdByPvalue
|
3
|
+
def threshold(pvalue)
|
4
|
+
thresholds(pvalue){|_, thresh, _| return thresh }
|
5
|
+
end
|
6
|
+
|
7
|
+
def thresholds(*pvalues)
|
8
|
+
thresholds_by_pvalues(*pvalues).each do |pvalue,(left_threshold, left_count, right_threshold, right_count)|
|
9
|
+
threshold = left_threshold + 0.1
|
10
|
+
real_pvalue = right_count.to_f / number_of_words
|
11
|
+
yield pvalue, threshold, real_pvalue
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# ret-value: hash {pvalue => [left_threshold, left_count, right_threshold, right_count]}
|
16
|
+
def thresholds_by_pvalues(*pvalues)
|
17
|
+
max_pvalue = pvalues.max
|
18
|
+
max_look_for_count = max_pvalue * sum_of_probabilities ** length
|
19
|
+
scores={}
|
20
|
+
until scores.inject(0){|sum,(score,count)| sum + count} >= max_look_for_count
|
21
|
+
scores = calculate_count_distribution_after_threshold(threshold_gauss_estimation(max_pvalue))
|
22
|
+
max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue
|
23
|
+
end
|
24
|
+
pvalue_counts = pvalues.sort.inject(Hash.new){|h, pvalue| h.merge pvalue => pvalue * sum_of_probabilities**length }
|
25
|
+
look_for_counts = pvalue_counts.to_a
|
26
|
+
sum_count = 0
|
27
|
+
scores = scores.sort.reverse
|
28
|
+
results = {}
|
29
|
+
scores.size.times do |i|
|
30
|
+
while !look_for_counts.empty? and sum_count + scores[i][1] > look_for_counts.first[1] # usually this 'while' works as 'if'
|
31
|
+
cnt = look_for_counts.shift
|
32
|
+
pval = cnt[0]
|
33
|
+
score = cnt[1]
|
34
|
+
|
35
|
+
threshold_2 = scores[i][0]
|
36
|
+
sum_count_2 = sum_count + scores[i][1]
|
37
|
+
if i>0
|
38
|
+
threshold = scores[i-1][0]
|
39
|
+
results[pval] = [threshold_2.to_f, sum_count_2, threshold.to_f, sum_count.to_f]
|
40
|
+
else
|
41
|
+
results[pval] = [threshold_2.to_f, sum_count_2.to_f, best_score + 1.0, 0.0]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
sum_count += scores[i][1]
|
45
|
+
end
|
46
|
+
results
|
47
|
+
end
|
48
|
+
|
49
|
+
def calculate_count_distribution_after_threshold(threshold)
|
50
|
+
scores = { 0 => 1 }
|
51
|
+
length.times do |column|
|
52
|
+
new_scores = Hash.new(0);
|
53
|
+
scores.each do |score, count|
|
54
|
+
4.times do |letter|
|
55
|
+
new_score = score + matrix[column][letter]
|
56
|
+
if new_score + best_suffix[column + 1] >= threshold
|
57
|
+
new_scores[new_score] += count * probabilities[letter]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
raise 'Hash overflow in PWM::ThresholdByPvalue#calculate_count_distribution_after_threshold' if defined? MaxHashSize and new_scores.size > MaxHashSize
|
62
|
+
scores = new_scores
|
63
|
+
end
|
64
|
+
scores
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
data/lib/macroape.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'macroape/version'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
require 'macroape/extract_pwm'
|
5
|
+
require 'macroape/support'
|
6
|
+
require 'macroape/matrix_transformations'
|
7
|
+
require 'macroape/matrix_information'
|
8
|
+
require 'macroape/gauss_estimation'
|
9
|
+
require 'macroape/threshold_by_pvalue'
|
10
|
+
require 'macroape/single_matrix'
|
11
|
+
require 'macroape/count_by_threshold'
|
12
|
+
require 'macroape/matrix_on_background'
|
13
|
+
|
14
|
+
require 'macroape/aligned_pair_transformations'
|
15
|
+
require 'macroape/aligned_pair_metrics'
|
16
|
+
require 'macroape/aligned_pair_intersection'
|
17
|
+
require 'macroape/pwm_compare_aligned'
|
18
|
+
|
19
|
+
require 'macroape/pair_transformations'
|
20
|
+
require 'macroape/pair_metrics'
|
21
|
+
require 'macroape/pwm_compare'
|
22
|
+
require 'macroape/collection'
|
23
|
+
|
24
|
+
module Macroape
|
25
|
+
# Your code goes here...
|
26
|
+
end
|
data/macroape.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/macroape/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Ilya Vorontsov"]
|
6
|
+
gem.email = ["prijutme4ty@gmail.com"]
|
7
|
+
gem.description = %q{Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value Estimation. It's a bioinformatic tool for evaluating similarity measure and best alignment between a pair of Position Weight Matrices(PWM), finding thresholds by P-values and inside out and even searching a collection of motifs for the most similar ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw}
|
8
|
+
gem.summary = %q{PWM comparison tool using MACROAPE approach}
|
9
|
+
gem.homepage = "http://autosome.ru/macroape/"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "macroape"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Macroape::VERSION
|
17
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
AHR_si
|
2
|
+
0.049659785047588834 -0.7112292711652757 0.3721858143748157 0.007118345755728267
|
3
|
+
-1.1863517717347094 -1.107677589450009 -0.10122262270571995 0.9004092051347228
|
4
|
+
-0.5481419620395529 0.12723771473208506 -0.5930888656288792 0.5502420250441413
|
5
|
+
-2.3460850844035757 -2.7362137930742167 1.3086872088653387 -1.9795601218727432
|
6
|
+
-3.45212725495741 1.3387809407527818 -2.67026944276278 -2.46866856874775
|
7
|
+
-2.1349735467777395 -2.965000508388952 1.3322808172765488 -3.206089427067848
|
8
|
+
-3.45212725495741 -2.1349735467777395 -3.0087911229263207 1.335191259740944
|
9
|
+
-3.45212725495741 -3.45212725495741 1.3622489404156526 -3.45212725495741
|
10
|
+
0.10927007207680632 0.5389064901558549 -0.8250174241334556 -0.3117320885632525
|
@@ -0,0 +1,11 @@
|
|
1
|
+
KLF4_f2.xml
|
2
|
+
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
3
|
+
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
4
|
+
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
5
|
+
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
6
|
+
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
7
|
+
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
8
|
+
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
9
|
+
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
10
|
+
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
11
|
+
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
@@ -0,0 +1,12 @@
|
|
1
|
+
> SP1_f1
|
2
|
+
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
3
|
+
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
4
|
+
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
5
|
+
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
6
|
+
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
7
|
+
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
8
|
+
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
9
|
+
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
10
|
+
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
11
|
+
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
12
|
+
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
|
@@ -0,0 +1,12 @@
|
|
1
|
+
SP1_f1_revcomp
|
2
|
+
-0.6825482097865606 -0.28626775700282125 0.7641033353626651 -0.5166047365590577
|
3
|
+
-1.6080243648157357 1.163676006374752 -1.2251832285630033 -1.1896356092245055
|
4
|
+
-1.7780413702431377 1.126543157436868 -2.2510053061629707 -0.4450938582835542
|
5
|
+
-2.0846739035376483 1.338748858978805 -3.138426807809667 -4.015548413965584
|
6
|
+
-2.3243342594990253 1.3113096718240569 -2.9784027708801153 -1.9643526491643326
|
7
|
+
-0.851098348782244 -2.624685764808605 0.9449196547620103 -0.07473964149330914
|
8
|
+
-4.8899306705082335 1.3510439136452728 -3.719643631330185 -2.2369752892820087
|
9
|
+
-5.077972423609655 1.3684371349982631 -2.916163300253228 -4.563896055436894
|
10
|
+
-4.338927525031567 1.1807800242010371 -2.973985191321805 -0.387227623476054
|
11
|
+
-0.6138563775211981 1.0960627561110399 -2.1119259694238686 -1.0631255752097801
|
12
|
+
-1.1060188862599292 0.8657012535789861 -0.6748234046937317 -0.24435707885585334
|
@@ -0,0 +1,14 @@
|
|
1
|
+
GABPA_f1
|
2
|
+
-0.1106670158341858 0.013801606113892391 0.6054596108973699 -1.3518085041421573
|
3
|
+
0.37030668921643345 0.15761121480429963 0.009069314183831202 -0.9888619717703562
|
4
|
+
0.47526546359546684 -0.3011678534572083 0.4031522994412777 -1.8638752827041059
|
5
|
+
-1.5544255540164373 1.1082369687811506 -0.2814091552834454 -5.30708531823271
|
6
|
+
-0.6362037835776368 1.235338189985594 -3.5801322928552253 -5.717323067092849
|
7
|
+
-5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
|
8
|
+
-5.852906870733575 -5.852906870733575 1.3841383838057746 -5.852906870733575
|
9
|
+
1.3835219739184708 -5.2341956006430985 -5.852906870733575 -5.852906870733575
|
10
|
+
1.3756340514956562 -5.394962755562375 -5.394962755562375 -3.401117964959733
|
11
|
+
-1.2176198315414444 -3.109079898175411 1.2964067931472216 -5.717323067092849
|
12
|
+
-1.3716559438167257 -0.2761401935045069 -1.8504445165866068 1.0404320473626856
|
13
|
+
-0.5440863133031895 -0.48103682561971345 0.907381908447086 -1.1280642594012078
|
14
|
+
0.10557340209290218 -0.01814819455289191 0.4381106695354074 -1.0304105539540915
|
@@ -0,0 +1,11 @@
|
|
1
|
+
KLF4_f2
|
2
|
+
0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
3
|
+
-1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
4
|
+
-2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
5
|
+
-2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
6
|
+
-0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
7
|
+
-1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
8
|
+
-2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
9
|
+
-1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
10
|
+
-2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
11
|
+
-1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
@@ -0,0 +1,12 @@
|
|
1
|
+
> SP1_f1
|
2
|
+
-0.24435707885585334 -0.6748234046937317 0.8657012535789861 -1.1060188862599292
|
3
|
+
-1.0631255752097801 -2.1119259694238686 1.0960627561110399 -0.6138563775211981
|
4
|
+
-0.387227623476054 -2.973985191321805 1.1807800242010371 -4.338927525031567
|
5
|
+
-4.563896055436894 -2.916163300253228 1.3684371349982631 -5.077972423609655
|
6
|
+
-2.2369752892820087 -3.719643631330185 1.3510439136452728 -4.8899306705082335
|
7
|
+
-0.07473964149330914 0.9449196547620103 -2.624685764808605 -0.851098348782244
|
8
|
+
-1.9643526491643326 -2.9784027708801153 1.3113096718240569 -2.3243342594990253
|
9
|
+
-4.015548413965584 -3.138426807809667 1.338748858978805 -2.0846739035376483
|
10
|
+
-0.4450938582835542 -2.2510053061629707 1.126543157436868 -1.7780413702431377
|
11
|
+
-1.1896356092245055 -1.2251832285630033 1.163676006374752 -1.6080243648157357
|
12
|
+
-0.5166047365590577 0.7641033353626651 -0.28626775700282125 -0.6825482097865606
|
@@ -0,0 +1,186 @@
|
|
1
|
+
--- !ruby/object:PWM::Collection
|
2
|
+
pvalues:
|
3
|
+
- 0.0005
|
4
|
+
- 0.0001
|
5
|
+
- 5.0e-05
|
6
|
+
background:
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
- 1
|
10
|
+
- 1
|
11
|
+
precise_discretization: 10
|
12
|
+
rough_discretization: 1
|
13
|
+
pwms:
|
14
|
+
GABPA_f1: !ruby/object:PWM::SingleMatrix
|
15
|
+
matrix:
|
16
|
+
- - -0.1106670158341858
|
17
|
+
- 0.013801606113892391
|
18
|
+
- 0.6054596108973699
|
19
|
+
- -1.3518085041421573
|
20
|
+
- - 0.37030668921643345
|
21
|
+
- 0.15761121480429963
|
22
|
+
- 0.009069314183831202
|
23
|
+
- -0.9888619717703562
|
24
|
+
- - 0.47526546359546684
|
25
|
+
- -0.3011678534572083
|
26
|
+
- 0.4031522994412777
|
27
|
+
- -1.8638752827041059
|
28
|
+
- - -1.5544255540164373
|
29
|
+
- 1.1082369687811506
|
30
|
+
- -0.2814091552834454
|
31
|
+
- -5.30708531823271
|
32
|
+
- - -0.6362037835776368
|
33
|
+
- 1.235338189985594
|
34
|
+
- -3.5801322928552253
|
35
|
+
- -5.717323067092849
|
36
|
+
- - -5.852906870733575
|
37
|
+
- -5.852906870733575
|
38
|
+
- 1.3841383838057746
|
39
|
+
- -5.852906870733575
|
40
|
+
- - -5.852906870733575
|
41
|
+
- -5.852906870733575
|
42
|
+
- 1.3841383838057746
|
43
|
+
- -5.852906870733575
|
44
|
+
- - 1.3835219739184708
|
45
|
+
- -5.2341956006430985
|
46
|
+
- -5.852906870733575
|
47
|
+
- -5.852906870733575
|
48
|
+
- - 1.3756340514956562
|
49
|
+
- -5.394962755562375
|
50
|
+
- -5.394962755562375
|
51
|
+
- -3.401117964959733
|
52
|
+
- - -1.2176198315414444
|
53
|
+
- -3.109079898175411
|
54
|
+
- 1.2964067931472216
|
55
|
+
- -5.717323067092849
|
56
|
+
- - -1.3716559438167257
|
57
|
+
- -0.2761401935045069
|
58
|
+
- -1.8504445165866068
|
59
|
+
- 1.0404320473626856
|
60
|
+
- - -0.5440863133031895
|
61
|
+
- -0.48103682561971345
|
62
|
+
- 0.907381908447086
|
63
|
+
- -1.1280642594012078
|
64
|
+
- - 0.10557340209290218
|
65
|
+
- -0.01814819455289191
|
66
|
+
- 0.4381106695354074
|
67
|
+
- -1.0304105539540915
|
68
|
+
name: GABPA_f1
|
69
|
+
KLF4_f2: !ruby/object:PWM::SingleMatrix
|
70
|
+
matrix:
|
71
|
+
- - 0.30861857265872605
|
72
|
+
- -2.254321000121579
|
73
|
+
- 0.13505703522674192
|
74
|
+
- 0.3285194224375633
|
75
|
+
- - -1.227018967707036
|
76
|
+
- -4.814127713368663
|
77
|
+
- 1.3059890687390967
|
78
|
+
- -4.908681463544344
|
79
|
+
- - -2.443469374521196
|
80
|
+
- -4.648238485031404
|
81
|
+
- 1.3588686548279805
|
82
|
+
- -4.441801801188402
|
83
|
+
- - -2.7177827948276123
|
84
|
+
- -3.8073538975356565
|
85
|
+
- 1.356272809724262
|
86
|
+
- -3.504104725510225
|
87
|
+
- - -0.5563232977367343
|
88
|
+
- 0.5340697765121405
|
89
|
+
- -3.61417723090579
|
90
|
+
- 0.5270259776377405
|
91
|
+
- - -1.8687622060887386
|
92
|
+
- -4.381483976582316
|
93
|
+
- 1.337932245336098
|
94
|
+
- -3.815629658877517
|
95
|
+
- - -2.045671123823928
|
96
|
+
- -2.384975142213679
|
97
|
+
- 0.7198551207724355
|
98
|
+
- 0.5449254135616948
|
99
|
+
- - -1.373157530374372
|
100
|
+
- -3.0063112097748217
|
101
|
+
- 1.285188335493552
|
102
|
+
- -2.5026044231773543
|
103
|
+
- - -2.1030513122772208
|
104
|
+
- -1.8941348100402244
|
105
|
+
- 1.249265758393991
|
106
|
+
- -1.4284210948906104
|
107
|
+
- - -1.3277128628152939
|
108
|
+
- 0.8982415633049462
|
109
|
+
- -0.8080773665408135
|
110
|
+
- -0.18161647647456935
|
111
|
+
name: KLF4_f2
|
112
|
+
SP1_f1: !ruby/object:PWM::SingleMatrix
|
113
|
+
matrix:
|
114
|
+
- - -0.24435707885585334
|
115
|
+
- -0.6748234046937317
|
116
|
+
- 0.8657012535789861
|
117
|
+
- -1.1060188862599292
|
118
|
+
- - -1.0631255752097801
|
119
|
+
- -2.1119259694238686
|
120
|
+
- 1.0960627561110399
|
121
|
+
- -0.6138563775211981
|
122
|
+
- - -0.387227623476054
|
123
|
+
- -2.973985191321805
|
124
|
+
- 1.1807800242010371
|
125
|
+
- -4.338927525031567
|
126
|
+
- - -4.563896055436894
|
127
|
+
- -2.916163300253228
|
128
|
+
- 1.3684371349982631
|
129
|
+
- -5.077972423609655
|
130
|
+
- - -2.2369752892820087
|
131
|
+
- -3.719643631330185
|
132
|
+
- 1.3510439136452728
|
133
|
+
- -4.8899306705082335
|
134
|
+
- - -0.07473964149330914
|
135
|
+
- 0.9449196547620103
|
136
|
+
- -2.624685764808605
|
137
|
+
- -0.851098348782244
|
138
|
+
- - -1.9643526491643326
|
139
|
+
- -2.9784027708801153
|
140
|
+
- 1.3113096718240569
|
141
|
+
- -2.3243342594990253
|
142
|
+
- - -4.015548413965584
|
143
|
+
- -3.138426807809667
|
144
|
+
- 1.338748858978805
|
145
|
+
- -2.0846739035376483
|
146
|
+
- - -0.4450938582835542
|
147
|
+
- -2.2510053061629707
|
148
|
+
- 1.126543157436868
|
149
|
+
- -1.7780413702431377
|
150
|
+
- - -1.1896356092245055
|
151
|
+
- -1.2251832285630033
|
152
|
+
- 1.163676006374752
|
153
|
+
- -1.6080243648157357
|
154
|
+
- - -0.5166047365590577
|
155
|
+
- 0.7641033353626651
|
156
|
+
- -0.28626775700282125
|
157
|
+
- -0.6825482097865606
|
158
|
+
name: SP1_f1
|
159
|
+
infos:
|
160
|
+
GABPA_f1:
|
161
|
+
:rough:
|
162
|
+
5.0e-05: 16.1
|
163
|
+
0.0001: 15.1
|
164
|
+
0.0005: 12.1
|
165
|
+
:precise:
|
166
|
+
5.0e-05: 8.61
|
167
|
+
0.0001: 7.609999999999999
|
168
|
+
0.0005: 4.51
|
169
|
+
KLF4_f2:
|
170
|
+
:rough:
|
171
|
+
5.0e-05: 14.1
|
172
|
+
0.0001: 13.1
|
173
|
+
0.0005: 11.1
|
174
|
+
:precise:
|
175
|
+
5.0e-05: 8.51
|
176
|
+
0.0001: 7.909999999999999
|
177
|
+
0.0005: 5.8100000000000005
|
178
|
+
SP1_f1:
|
179
|
+
:rough:
|
180
|
+
5.0e-05: 14.1
|
181
|
+
0.0001: 14.1
|
182
|
+
0.0005: 11.1
|
183
|
+
:precise:
|
184
|
+
5.0e-05: 8.51
|
185
|
+
0.0001: 7.709999999999999
|
186
|
+
0.0005: 5.61
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
module Helpers
|
4
|
+
def self.obtain_pvalue_by_threshold(args)
|
5
|
+
IO.popen("find_pvalue #{args}",&:read).strip.split.last
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class FindThresholdTest < Test::Unit::TestCase
|
10
|
+
def test_process_several_pvalues
|
11
|
+
pvalues = []
|
12
|
+
IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001 0.0005', &:read).lines.each{|line|
|
13
|
+
pvalue, threshold, real_pvalue = line.strip.split("\t")
|
14
|
+
pvalues << pvalue
|
15
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
|
16
|
+
}
|
17
|
+
assert_equal pvalues, ['0.0005', '0.001']
|
18
|
+
end
|
19
|
+
def test_process_one_pvalue
|
20
|
+
pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -p 0.001', &:read).strip.split("\t")
|
21
|
+
assert_equal '0.001', pvalue
|
22
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
|
23
|
+
end
|
24
|
+
def test_process_default_pvalue
|
25
|
+
pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat', &:read).strip.split("\t")
|
26
|
+
assert_equal '0.0005', pvalue
|
27
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
|
28
|
+
end
|
29
|
+
def test_custom_discretization
|
30
|
+
pvalue, threshold, real_pvalue = IO.popen('find_threshold test/data/KLF4_f2.pat -d 100',&:read).strip.split("\t")
|
31
|
+
assert_equal '0.0005', pvalue
|
32
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold} -d 100"), real_pvalue
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class FindPvalueTest < Test::Unit::TestCase
|
37
|
+
def test_process_one_threshold
|
38
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719'){|f|
|
39
|
+
assert_equal "4.1719\t1048.0\t0.00099945068359375\n", f.read
|
40
|
+
}
|
41
|
+
end
|
42
|
+
def test_process_several_thresholds
|
43
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 4.1719 5.2403'){|f|
|
44
|
+
assert_equal "4.1719\t1048.0\t0.00099945068359375\n5.2403\t524.0\t0.000499725341796875\n", f.read
|
45
|
+
}
|
46
|
+
end
|
47
|
+
def test_process_several_thresholds_result_is_ordered
|
48
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 4.1719'){|f|
|
49
|
+
assert_equal "5.2403\t524.0\t0.000499725341796875\n4.1719\t1048.0\t0.00099945068359375\n", f.read
|
50
|
+
}
|
51
|
+
end
|
52
|
+
def test_custom_discretization
|
53
|
+
IO.popen('find_pvalue test/data/KLF4_f2.pat 5.2403 -d 100'){|f|
|
54
|
+
assert_equal "5.2403\t527.0\t0.0005025863647460938\n", f.read
|
55
|
+
}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
class TestEvalSimilarity < Test::Unit::TestCase
|
61
|
+
def test_process_pair_of_pwms
|
62
|
+
IO.popen('eval_similarity test/data/KLF4_f2.pat test/data/SP1_f1.pat'){|f|
|
63
|
+
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
|
64
|
+
}
|
65
|
+
end
|
66
|
+
def test_process_another_pair_of_pwms
|
67
|
+
IO.popen('eval_similarity test/data/SP1_f1.pat test/data/AHR_si.pat'){|f|
|
68
|
+
assert_equal "0.0037332005973120955\n15.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>.\n1\tdirect\n", f.read
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_recognize_orientation_of_alignment
|
73
|
+
IO.popen('eval_similarity test/data/SP1_f1_revcomp.pat test/data/SP1_f1.pat'){|f|
|
74
|
+
assert_equal "1.0\n2033.0\t11\n>>>>>>>>>>>\n<<<<<<<<<<<\n0\trevcomp\n", f.read
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_process_custom_discretization
|
79
|
+
IO.popen('eval_similarity test/data/SP1_f1.pat test/data/KLF4_f2.pat -d 1'){|f|
|
80
|
+
assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n", f.read
|
81
|
+
}
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class TestEvalAlignmentSimilarity < Test::Unit::TestCase
|
86
|
+
def test_process_at_optimal_alignment
|
87
|
+
IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct '){|f|
|
88
|
+
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
|
89
|
+
}
|
90
|
+
end
|
91
|
+
def test_process_not_optimal_alignment
|
92
|
+
IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct '){|f|
|
93
|
+
assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
|
94
|
+
}
|
95
|
+
end
|
96
|
+
def test_process_at_optimal_alignment_reversed
|
97
|
+
IO.popen('eval_alignment test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp '){|f|
|
98
|
+
assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
|
99
|
+
}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class TestPreprocessCollection < Test::Unit::TestCase
|
104
|
+
def test_multipvalue_preproceessing
|
105
|
+
system('preprocess_collection ./test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
|
106
|
+
assert_equal File.read('test/data/test_collection.yaml'), File.read('test/data/test_collection.yaml.tmp')
|
107
|
+
File.delete 'test/data/test_collection.yaml.tmp'
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class TestScanCollection < Test::Unit::TestCase
|
112
|
+
def test_scan_default_cutoff
|
113
|
+
assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt'),
|
114
|
+
IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --silent', &:read)
|
115
|
+
end
|
116
|
+
def test_scan_and_output_all_results
|
117
|
+
assert_equal File.read('test/data/KLF4_f2_scan_results_all.txt'),
|
118
|
+
IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --all --silent', &:read)
|
119
|
+
|
120
|
+
end
|
121
|
+
def test_scan_precise_mode
|
122
|
+
assert_equal File.read('test/data/KLF4_f2_scan_results_precise_mode.txt'),
|
123
|
+
IO.popen('scan_collection test/data/KLF4_f2.pat test/data/test_collection.yaml --precise --all --silent', &:read)
|
124
|
+
end
|
125
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: macroape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 3.2.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ilya Vorontsov
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-28 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
15
|
+
Estimation. It's a bioinformatic tool for evaluating similarity measure and best
|
16
|
+
alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
|
17
|
+
P-values and inside out and even searching a collection of motifs for the most similar
|
18
|
+
ones. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
|
19
|
+
email:
|
20
|
+
- prijutme4ty@gmail.com
|
21
|
+
executables:
|
22
|
+
- eval_alignment
|
23
|
+
- eval_similarity
|
24
|
+
- find_pvalue
|
25
|
+
- find_threshold
|
26
|
+
- preprocess_collection
|
27
|
+
- scan_collection
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files: []
|
30
|
+
files:
|
31
|
+
- .gitignore
|
32
|
+
- Gemfile
|
33
|
+
- LICENSE
|
34
|
+
- README.md
|
35
|
+
- Rakefile
|
36
|
+
- bin/eval_alignment
|
37
|
+
- bin/eval_similarity
|
38
|
+
- bin/find_pvalue
|
39
|
+
- bin/find_threshold
|
40
|
+
- bin/preprocess_collection
|
41
|
+
- bin/scan_collection
|
42
|
+
- lib/macroape.rb
|
43
|
+
- lib/macroape/aligned_pair_intersection.rb
|
44
|
+
- lib/macroape/aligned_pair_metrics.rb
|
45
|
+
- lib/macroape/aligned_pair_transformations.rb
|
46
|
+
- lib/macroape/collection.rb
|
47
|
+
- lib/macroape/count_by_threshold.rb
|
48
|
+
- lib/macroape/exec/eval_alignment.rb
|
49
|
+
- lib/macroape/exec/eval_similarity.rb
|
50
|
+
- lib/macroape/exec/find_pvalue.rb
|
51
|
+
- lib/macroape/exec/find_threshold.rb
|
52
|
+
- lib/macroape/exec/preprocess_collection.rb
|
53
|
+
- lib/macroape/exec/scan_collection.rb
|
54
|
+
- lib/macroape/extract_pwm.rb
|
55
|
+
- lib/macroape/gauss_estimation.rb
|
56
|
+
- lib/macroape/matrix_information.rb
|
57
|
+
- lib/macroape/matrix_on_background.rb
|
58
|
+
- lib/macroape/matrix_transformations.rb
|
59
|
+
- lib/macroape/pair_metrics.rb
|
60
|
+
- lib/macroape/pair_transformations.rb
|
61
|
+
- lib/macroape/pwm_compare.rb
|
62
|
+
- lib/macroape/pwm_compare_aligned.rb
|
63
|
+
- lib/macroape/single_matrix.rb
|
64
|
+
- lib/macroape/support.rb
|
65
|
+
- lib/macroape/threshold_by_pvalue.rb
|
66
|
+
- lib/macroape/version.rb
|
67
|
+
- macroape.gemspec
|
68
|
+
- test/data/AHR_si.pat
|
69
|
+
- test/data/KLF4_f2.pat
|
70
|
+
- test/data/KLF4_f2_scan_results_all.txt
|
71
|
+
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
72
|
+
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
73
|
+
- test/data/SP1_f1.pat
|
74
|
+
- test/data/SP1_f1_revcomp.pat
|
75
|
+
- test/data/test_collection.yaml
|
76
|
+
- test/data/test_collection/GABPA_f1.pat
|
77
|
+
- test/data/test_collection/KLF4_f2.pat
|
78
|
+
- test/data/test_collection/SP1_f1.pat
|
79
|
+
- test/macroape_test.rb
|
80
|
+
homepage: http://autosome.ru/macroape/
|
81
|
+
licenses: []
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project:
|
100
|
+
rubygems_version: 1.8.24
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: PWM comparison tool using MACROAPE approach
|
104
|
+
test_files:
|
105
|
+
- test/data/AHR_si.pat
|
106
|
+
- test/data/KLF4_f2.pat
|
107
|
+
- test/data/KLF4_f2_scan_results_all.txt
|
108
|
+
- test/data/KLF4_f2_scan_results_default_cutoff.txt
|
109
|
+
- test/data/KLF4_f2_scan_results_precise_mode.txt
|
110
|
+
- test/data/SP1_f1.pat
|
111
|
+
- test/data/SP1_f1_revcomp.pat
|
112
|
+
- test/data/test_collection.yaml
|
113
|
+
- test/data/test_collection/GABPA_f1.pat
|
114
|
+
- test/data/test_collection/KLF4_f2.pat
|
115
|
+
- test/data/test_collection/SP1_f1.pat
|
116
|
+
- test/macroape_test.rb
|