macroape 4.0.1 → 4.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/TODO.txt +7 -0
- data/lib/macroape/cli/align_motifs.rb +2 -2
- data/lib/macroape/cli/eval_alignment.rb +2 -2
- data/lib/macroape/cli/eval_similarity.rb +2 -2
- data/lib/macroape/cli/find_pvalue.rb +1 -1
- data/lib/macroape/cli/find_threshold.rb +1 -1
- data/lib/macroape/cli/preprocess_collection.rb +1 -0
- data/lib/macroape/cli/scan_collection.rb +1 -1
- data/lib/macroape/version.rb +1 -1
- data/spec/count_distribution_spec.rb +59 -1
- data/spec/spec_helper.rb +8 -1
- metadata +8 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe425b463f7ed42c64027a15d68ee85a38334058
|
4
|
+
data.tar.gz: f9789653cf1708f00eb9255a24c4c009c352f3a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a09caa08fe447ef672ad91793151f0d1a2fef245141e86f5f54e467d881e32863a0e76502e7345297d702c9115d1b721e365a1c6e314e5cd5b55920e0f04a585
|
7
|
+
data.tar.gz: fcdf7a5724d1a8ad1d589a5d1d9ec892d780df3d05234af09e6ee546a486a95f4c833317ec2392fae65c55dd406b30d7e71e0eba9c09833edd86c4d489817679
|
data/TODO.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
Why Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') on java works only for pvalues less or equal than 0.5, but 0.55 or 0.8 breaks the program (it doesn't stop). My hypothesis is that gauss threshold estimation fails
|
2
|
+
|
3
|
+
|
4
|
+
Make --same-strand mode which disallows revcomp transformation of motif - so it'll be possible to compare RNA-PWMs.
|
5
|
+
|
6
|
+
Fix align_motifs so that keys can go after arguments (use OptionParser in other words)
|
7
|
+
|
1
8
|
ToDo:
|
2
9
|
6)
|
3
10
|
# TODO: FIX: this test fails due to floating point precision error: estimated threshold is -19.0418 but '-19.0418'.to_f * 10000 = -190417.99999999997
|
@@ -77,11 +77,11 @@ module Macroape
|
|
77
77
|
|
78
78
|
shifts = []
|
79
79
|
shifts << [leader_pwm_file, 0, :direct]
|
80
|
-
pwm_first = data_model.new(File.read(leader_pwm_file)).to_pwm
|
80
|
+
pwm_first = data_model.new(File.read(leader_pwm_file)).set_parameters(background: leader_background).to_pwm
|
81
81
|
pwm_first.set_parameters(background: leader_background, max_hash_size: max_hash_size).discrete!(discretization)
|
82
82
|
|
83
83
|
rest_pwm_files.each do |motif_name|
|
84
|
-
pwm_second = data_model.new(File.read(motif_name)).to_pwm
|
84
|
+
pwm_second = data_model.new(File.read(motif_name)).set_parameters(background: rest_motifs_background).to_pwm
|
85
85
|
pwm_second.set_parameters(background: rest_motifs_background, max_hash_size: max_hash_size).discrete!(discretization)
|
86
86
|
cmp = Macroape::PWMCompare.new(pwm_first, pwm_second).set_parameters(max_pair_hash_size: max_pair_hash_size)
|
87
87
|
info = cmp.jaccard_by_pvalue(pvalue)
|
@@ -101,7 +101,7 @@ module Macroape
|
|
101
101
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
102
102
|
input_first = File.read(first_file)
|
103
103
|
end
|
104
|
-
pwm_first = data_model.new(input_first).to_pwm
|
104
|
+
pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
|
105
105
|
|
106
106
|
if second_file == '.stdin'
|
107
107
|
input_second = parser.parse
|
@@ -109,7 +109,7 @@ module Macroape
|
|
109
109
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
110
110
|
input_second = File.read(second_file)
|
111
111
|
end
|
112
|
-
pwm_second = data_model.new(input_second).to_pwm
|
112
|
+
pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
|
113
113
|
|
114
114
|
pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
|
115
115
|
pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
|
@@ -81,7 +81,7 @@ module Macroape
|
|
81
81
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
82
82
|
input_first = File.read(first_file)
|
83
83
|
end
|
84
|
-
pwm_first = data_model.new(input_first).to_pwm
|
84
|
+
pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
|
85
85
|
|
86
86
|
if second_file == '.stdin'
|
87
87
|
input_second = parser.parse
|
@@ -89,7 +89,7 @@ module Macroape
|
|
89
89
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
90
90
|
input_second = File.read(second_file)
|
91
91
|
end
|
92
|
-
pwm_second = data_model.new(input_second).to_pwm
|
92
|
+
pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
|
93
93
|
|
94
94
|
pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
|
95
95
|
pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
|
@@ -62,7 +62,7 @@ module Macroape
|
|
62
62
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
63
63
|
input = File.read(filename)
|
64
64
|
end
|
65
|
-
pwm = data_model.new(input).to_pwm
|
65
|
+
pwm = data_model.new(input).set_parameters(background: background).to_pwm
|
66
66
|
pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
|
67
67
|
|
68
68
|
counts = pwm.counts_by_thresholds(* thresholds.map{|count| count * discretization})
|
@@ -68,7 +68,7 @@ module Macroape
|
|
68
68
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
69
69
|
input = File.read(filename)
|
70
70
|
end
|
71
|
-
pwm = data_model.new(input).to_pwm
|
71
|
+
pwm = data_model.new(input).set_parameters(background: background).to_pwm
|
72
72
|
pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
|
73
73
|
|
74
74
|
infos = []
|
@@ -94,7 +94,7 @@ module Macroape
|
|
94
94
|
query_input = File.read(filename)
|
95
95
|
end
|
96
96
|
|
97
|
-
query_pwm = data_model.new(query_input).to_pwm
|
97
|
+
query_pwm = data_model.new(query_input).set_parameters(background: query_background).to_pwm
|
98
98
|
query_pwm.set_parameters(background: query_background, max_hash_size: max_hash_size)
|
99
99
|
|
100
100
|
query_pwm_rough = query_pwm.discrete(rough_discretization)
|
data/lib/macroape/version.rb
CHANGED
@@ -6,6 +6,9 @@ describe Bioinform::PWM do
|
|
6
6
|
let :matrix_second do [[1,2,3,4],[2,3,4,5]] end
|
7
7
|
let :pwm_first do Bioinform::PWM.new(matrix_first) end
|
8
8
|
let :pwm_second do Bioinform::PWM.new(matrix_second) end
|
9
|
+
let :background do [0.1,0.4,0.4,0.1] end
|
10
|
+
let :pwm_first_on_background do pwm_first.tap{|pwm| pwm.set_parameters(background: background)} end
|
11
|
+
let :pwm_second_on_background do pwm_second.tap{|pwm| pwm.set_parameters(background: background)} end
|
9
12
|
|
10
13
|
context '#count_distribution_after_threshold' do
|
11
14
|
|
@@ -21,6 +24,19 @@ describe Bioinform::PWM do
|
|
21
24
|
distribution_second.should == { 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
|
22
25
|
end
|
23
26
|
|
27
|
+
it 'for PWMs on different background it should contain the same scores (keys of hash)' do
|
28
|
+
pwm_first.count_distribution_after_threshold(0).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(0).keys.sort
|
29
|
+
pwm_first.count_distribution_after_threshold(13).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(13).keys.sort
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should return hash of score => count for all scores >= threshold when calculated on background' do
|
33
|
+
distribution_second = pwm_second_on_background.count_distribution_after_threshold(0)
|
34
|
+
distribution_second.should have_nearly_the_same_values({ 3=>0.01, 4=>0.08, 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
|
35
|
+
|
36
|
+
distribution_second = pwm_second_on_background.count_distribution_after_threshold(5)
|
37
|
+
distribution_second.should have_nearly_the_same_values({ 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
|
38
|
+
end
|
39
|
+
|
24
40
|
it 'should use existing precalculated hash @count_distribution if it exists' do
|
25
41
|
pwm = pwm_second;
|
26
42
|
pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
|
@@ -48,4 +64,46 @@ describe Bioinform::PWM do
|
|
48
64
|
end
|
49
65
|
end
|
50
66
|
|
51
|
-
|
67
|
+
context '#pvalue_by_threshold' do
|
68
|
+
it 'should return probability to be >= than threshold' do
|
69
|
+
pwm_second.pvalue_by_threshold(7).should be_within(1e-7).of(6.0/16)
|
70
|
+
end
|
71
|
+
it 'should return probability to be >= than threshold when calculated on background' do
|
72
|
+
pwm_second_on_background.pvalue_by_threshold(7).should be_within(1e-7).of(0.33)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
context '#threshold' do
|
76
|
+
it 'should return threshold such that according pvalue doesn\'t exceed requested value' do
|
77
|
+
requested_pvalue = 6.0/16
|
78
|
+
threshold = pwm_second.threshold(requested_pvalue)
|
79
|
+
pwm_second.pvalue_by_threshold(threshold).should <= requested_pvalue
|
80
|
+
end
|
81
|
+
it 'should return threshold such that according pvalue doesn\'t exceed requested value when calculated on background' do
|
82
|
+
requested_pvalue = 0.33
|
83
|
+
threshold = pwm_second_on_background.threshold(requested_pvalue)
|
84
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
|
85
|
+
end
|
86
|
+
it 'should return threshold such that according pvalue doesn\'t exceed requested value when actual pvalue isn\'t exact equal to requested' do
|
87
|
+
requested_pvalue = 0.335
|
88
|
+
threshold = pwm_second_on_background.threshold(requested_pvalue)
|
89
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
|
90
|
+
end
|
91
|
+
end
|
92
|
+
context '#weak_threshold' do
|
93
|
+
it 'should return threshold such that according pvalue exceed requested value' do
|
94
|
+
requested_pvalue = 6.0/16
|
95
|
+
threshold = pwm_second.weak_threshold(requested_pvalue)
|
96
|
+
pwm_second.pvalue_by_threshold(threshold).should >= requested_pvalue
|
97
|
+
end
|
98
|
+
it 'should return threshold such that according pvalue exceed requested value when calculated on background' do
|
99
|
+
requested_pvalue = 0.33
|
100
|
+
threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
|
101
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
|
102
|
+
end
|
103
|
+
it 'should return threshold such that according pvalue exceed requested value when actual pvalue isn\'t exact equal to requested' do
|
104
|
+
requested_pvalue = 0.335
|
105
|
+
threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
|
106
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
$bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
|
2
2
|
$LOAD_PATH.unshift $bioinform_folder
|
3
3
|
|
4
|
-
require 'rspec'
|
4
|
+
require 'rspec'
|
5
|
+
|
6
|
+
# comparing hashes with float values
|
7
|
+
RSpec::Matchers.define :have_nearly_the_same_values do |expected, vicinity|
|
8
|
+
match do |actual|
|
9
|
+
expected.all?{|key, _| actual.has_key?(key)} && actual.all?{|key, _| expected.has_key?(key)} && expected.all?{|key, value| (actual[key] - value).abs <= vicinity }
|
10
|
+
end
|
11
|
+
end
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macroape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bioinform
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.1.10
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.1.10
|
27
27
|
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
@@ -42,7 +42,7 @@ executables:
|
|
42
42
|
extensions: []
|
43
43
|
extra_rdoc_files: []
|
44
44
|
files:
|
45
|
-
- .gitignore
|
45
|
+
- ".gitignore"
|
46
46
|
- Gemfile
|
47
47
|
- LICENSE
|
48
48
|
- README.md
|
@@ -118,17 +118,17 @@ require_paths:
|
|
118
118
|
- lib
|
119
119
|
required_ruby_version: !ruby/object:Gem::Requirement
|
120
120
|
requirements:
|
121
|
-
- -
|
121
|
+
- - ">="
|
122
122
|
- !ruby/object:Gem::Version
|
123
123
|
version: '0'
|
124
124
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
125
|
requirements:
|
126
|
-
- -
|
126
|
+
- - ">="
|
127
127
|
- !ruby/object:Gem::Version
|
128
128
|
version: '0'
|
129
129
|
requirements: []
|
130
130
|
rubyforge_project:
|
131
|
-
rubygems_version: 2.
|
131
|
+
rubygems_version: 2.1.5
|
132
132
|
signing_key:
|
133
133
|
specification_version: 4
|
134
134
|
summary: PWM comparison tool using MACROAPE approach
|
@@ -170,4 +170,3 @@ test_files:
|
|
170
170
|
- test/preprocess_collection_test.rb
|
171
171
|
- test/scan_collection_test.rb
|
172
172
|
- test/test_helper.rb
|
173
|
-
has_rdoc:
|