macroape 4.0.1 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TODO.txt +7 -0
- data/lib/macroape/cli/align_motifs.rb +2 -2
- data/lib/macroape/cli/eval_alignment.rb +2 -2
- data/lib/macroape/cli/eval_similarity.rb +2 -2
- data/lib/macroape/cli/find_pvalue.rb +1 -1
- data/lib/macroape/cli/find_threshold.rb +1 -1
- data/lib/macroape/cli/preprocess_collection.rb +1 -0
- data/lib/macroape/cli/scan_collection.rb +1 -1
- data/lib/macroape/version.rb +1 -1
- data/spec/count_distribution_spec.rb +59 -1
- data/spec/spec_helper.rb +8 -1
- metadata +8 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe425b463f7ed42c64027a15d68ee85a38334058
|
4
|
+
data.tar.gz: f9789653cf1708f00eb9255a24c4c009c352f3a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a09caa08fe447ef672ad91793151f0d1a2fef245141e86f5f54e467d881e32863a0e76502e7345297d702c9115d1b721e365a1c6e314e5cd5b55920e0f04a585
|
7
|
+
data.tar.gz: fcdf7a5724d1a8ad1d589a5d1d9ec892d780df3d05234af09e6ee546a486a95f4c833317ec2392fae65c55dd406b30d7e71e0eba9c09833edd86c4d489817679
|
data/TODO.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
Why Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') on java works only for pvalues less or equal than 0.5, but 0.55 or 0.8 breaks the program (it doesn't stop). My hypothesis is that gauss threshold estimation fails
|
2
|
+
|
3
|
+
|
4
|
+
Make --same-strand mode which disallows revcomp transformation of motif - so it'll be possible to compare RNA-PWMs.
|
5
|
+
|
6
|
+
Fix align_motifs so that keys can go after arguments (use OptionParser in other words)
|
7
|
+
|
1
8
|
ToDo:
|
2
9
|
6)
|
3
10
|
# TODO: FIX: this test fails due to floating point precision error: estimated threshold is -19.0418 but '-19.0418'.to_f * 10000 = -190417.99999999997
|
@@ -77,11 +77,11 @@ module Macroape
|
|
77
77
|
|
78
78
|
shifts = []
|
79
79
|
shifts << [leader_pwm_file, 0, :direct]
|
80
|
-
pwm_first = data_model.new(File.read(leader_pwm_file)).to_pwm
|
80
|
+
pwm_first = data_model.new(File.read(leader_pwm_file)).set_parameters(background: leader_background).to_pwm
|
81
81
|
pwm_first.set_parameters(background: leader_background, max_hash_size: max_hash_size).discrete!(discretization)
|
82
82
|
|
83
83
|
rest_pwm_files.each do |motif_name|
|
84
|
-
pwm_second = data_model.new(File.read(motif_name)).to_pwm
|
84
|
+
pwm_second = data_model.new(File.read(motif_name)).set_parameters(background: rest_motifs_background).to_pwm
|
85
85
|
pwm_second.set_parameters(background: rest_motifs_background, max_hash_size: max_hash_size).discrete!(discretization)
|
86
86
|
cmp = Macroape::PWMCompare.new(pwm_first, pwm_second).set_parameters(max_pair_hash_size: max_pair_hash_size)
|
87
87
|
info = cmp.jaccard_by_pvalue(pvalue)
|
@@ -101,7 +101,7 @@ module Macroape
|
|
101
101
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
102
102
|
input_first = File.read(first_file)
|
103
103
|
end
|
104
|
-
pwm_first = data_model.new(input_first).to_pwm
|
104
|
+
pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
|
105
105
|
|
106
106
|
if second_file == '.stdin'
|
107
107
|
input_second = parser.parse
|
@@ -109,7 +109,7 @@ module Macroape
|
|
109
109
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
110
110
|
input_second = File.read(second_file)
|
111
111
|
end
|
112
|
-
pwm_second = data_model.new(input_second).to_pwm
|
112
|
+
pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
|
113
113
|
|
114
114
|
pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
|
115
115
|
pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
|
@@ -81,7 +81,7 @@ module Macroape
|
|
81
81
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
82
82
|
input_first = File.read(first_file)
|
83
83
|
end
|
84
|
-
pwm_first = data_model.new(input_first).to_pwm
|
84
|
+
pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
|
85
85
|
|
86
86
|
if second_file == '.stdin'
|
87
87
|
input_second = parser.parse
|
@@ -89,7 +89,7 @@ module Macroape
|
|
89
89
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
90
90
|
input_second = File.read(second_file)
|
91
91
|
end
|
92
|
-
pwm_second = data_model.new(input_second).to_pwm
|
92
|
+
pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
|
93
93
|
|
94
94
|
pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
|
95
95
|
pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
|
@@ -62,7 +62,7 @@ module Macroape
|
|
62
62
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
63
63
|
input = File.read(filename)
|
64
64
|
end
|
65
|
-
pwm = data_model.new(input).to_pwm
|
65
|
+
pwm = data_model.new(input).set_parameters(background: background).to_pwm
|
66
66
|
pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
|
67
67
|
|
68
68
|
counts = pwm.counts_by_thresholds(* thresholds.map{|count| count * discretization})
|
@@ -68,7 +68,7 @@ module Macroape
|
|
68
68
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
69
69
|
input = File.read(filename)
|
70
70
|
end
|
71
|
-
pwm = data_model.new(input).to_pwm
|
71
|
+
pwm = data_model.new(input).set_parameters(background: background).to_pwm
|
72
72
|
pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
|
73
73
|
|
74
74
|
infos = []
|
@@ -94,7 +94,7 @@ module Macroape
|
|
94
94
|
query_input = File.read(filename)
|
95
95
|
end
|
96
96
|
|
97
|
-
query_pwm = data_model.new(query_input).to_pwm
|
97
|
+
query_pwm = data_model.new(query_input).set_parameters(background: query_background).to_pwm
|
98
98
|
query_pwm.set_parameters(background: query_background, max_hash_size: max_hash_size)
|
99
99
|
|
100
100
|
query_pwm_rough = query_pwm.discrete(rough_discretization)
|
data/lib/macroape/version.rb
CHANGED
@@ -6,6 +6,9 @@ describe Bioinform::PWM do
|
|
6
6
|
let :matrix_second do [[1,2,3,4],[2,3,4,5]] end
|
7
7
|
let :pwm_first do Bioinform::PWM.new(matrix_first) end
|
8
8
|
let :pwm_second do Bioinform::PWM.new(matrix_second) end
|
9
|
+
let :background do [0.1,0.4,0.4,0.1] end
|
10
|
+
let :pwm_first_on_background do pwm_first.tap{|pwm| pwm.set_parameters(background: background)} end
|
11
|
+
let :pwm_second_on_background do pwm_second.tap{|pwm| pwm.set_parameters(background: background)} end
|
9
12
|
|
10
13
|
context '#count_distribution_after_threshold' do
|
11
14
|
|
@@ -21,6 +24,19 @@ describe Bioinform::PWM do
|
|
21
24
|
distribution_second.should == { 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
|
22
25
|
end
|
23
26
|
|
27
|
+
it 'for PWMs on different background it should contain the same scores (keys of hash)' do
|
28
|
+
pwm_first.count_distribution_after_threshold(0).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(0).keys.sort
|
29
|
+
pwm_first.count_distribution_after_threshold(13).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(13).keys.sort
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should return hash of score => count for all scores >= threshold when calculated on background' do
|
33
|
+
distribution_second = pwm_second_on_background.count_distribution_after_threshold(0)
|
34
|
+
distribution_second.should have_nearly_the_same_values({ 3=>0.01, 4=>0.08, 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
|
35
|
+
|
36
|
+
distribution_second = pwm_second_on_background.count_distribution_after_threshold(5)
|
37
|
+
distribution_second.should have_nearly_the_same_values({ 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
|
38
|
+
end
|
39
|
+
|
24
40
|
it 'should use existing precalculated hash @count_distribution if it exists' do
|
25
41
|
pwm = pwm_second;
|
26
42
|
pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
|
@@ -48,4 +64,46 @@ describe Bioinform::PWM do
|
|
48
64
|
end
|
49
65
|
end
|
50
66
|
|
51
|
-
|
67
|
+
context '#pvalue_by_threshold' do
|
68
|
+
it 'should return probability to be >= than threshold' do
|
69
|
+
pwm_second.pvalue_by_threshold(7).should be_within(1e-7).of(6.0/16)
|
70
|
+
end
|
71
|
+
it 'should return probability to be >= than threshold when calculated on background' do
|
72
|
+
pwm_second_on_background.pvalue_by_threshold(7).should be_within(1e-7).of(0.33)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
context '#threshold' do
|
76
|
+
it 'should return threshold such that according pvalue doesn\'t exceed requested value' do
|
77
|
+
requested_pvalue = 6.0/16
|
78
|
+
threshold = pwm_second.threshold(requested_pvalue)
|
79
|
+
pwm_second.pvalue_by_threshold(threshold).should <= requested_pvalue
|
80
|
+
end
|
81
|
+
it 'should return threshold such that according pvalue doesn\'t exceed requested value when calculated on background' do
|
82
|
+
requested_pvalue = 0.33
|
83
|
+
threshold = pwm_second_on_background.threshold(requested_pvalue)
|
84
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
|
85
|
+
end
|
86
|
+
it 'should return threshold such that according pvalue doesn\'t exceed requested value when actual pvalue isn\'t exact equal to requested' do
|
87
|
+
requested_pvalue = 0.335
|
88
|
+
threshold = pwm_second_on_background.threshold(requested_pvalue)
|
89
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
|
90
|
+
end
|
91
|
+
end
|
92
|
+
context '#weak_threshold' do
|
93
|
+
it 'should return threshold such that according pvalue exceed requested value' do
|
94
|
+
requested_pvalue = 6.0/16
|
95
|
+
threshold = pwm_second.weak_threshold(requested_pvalue)
|
96
|
+
pwm_second.pvalue_by_threshold(threshold).should >= requested_pvalue
|
97
|
+
end
|
98
|
+
it 'should return threshold such that according pvalue exceed requested value when calculated on background' do
|
99
|
+
requested_pvalue = 0.33
|
100
|
+
threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
|
101
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
|
102
|
+
end
|
103
|
+
it 'should return threshold such that according pvalue exceed requested value when actual pvalue isn\'t exact equal to requested' do
|
104
|
+
requested_pvalue = 0.335
|
105
|
+
threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
|
106
|
+
pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
$bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
|
2
2
|
$LOAD_PATH.unshift $bioinform_folder
|
3
3
|
|
4
|
-
require 'rspec'
|
4
|
+
require 'rspec'
|
5
|
+
|
6
|
+
# comparing hashes with float values
|
7
|
+
RSpec::Matchers.define :have_nearly_the_same_values do |expected, vicinity|
|
8
|
+
match do |actual|
|
9
|
+
expected.all?{|key, _| actual.has_key?(key)} && actual.all?{|key, _| expected.has_key?(key)} && expected.all?{|key, value| (actual[key] - value).abs <= vicinity }
|
10
|
+
end
|
11
|
+
end
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macroape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bioinform
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.1.10
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.1.10
|
27
27
|
description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
|
@@ -42,7 +42,7 @@ executables:
|
|
42
42
|
extensions: []
|
43
43
|
extra_rdoc_files: []
|
44
44
|
files:
|
45
|
-
- .gitignore
|
45
|
+
- ".gitignore"
|
46
46
|
- Gemfile
|
47
47
|
- LICENSE
|
48
48
|
- README.md
|
@@ -118,17 +118,17 @@ require_paths:
|
|
118
118
|
- lib
|
119
119
|
required_ruby_version: !ruby/object:Gem::Requirement
|
120
120
|
requirements:
|
121
|
-
- -
|
121
|
+
- - ">="
|
122
122
|
- !ruby/object:Gem::Version
|
123
123
|
version: '0'
|
124
124
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
125
|
requirements:
|
126
|
-
- -
|
126
|
+
- - ">="
|
127
127
|
- !ruby/object:Gem::Version
|
128
128
|
version: '0'
|
129
129
|
requirements: []
|
130
130
|
rubyforge_project:
|
131
|
-
rubygems_version: 2.
|
131
|
+
rubygems_version: 2.1.5
|
132
132
|
signing_key:
|
133
133
|
specification_version: 4
|
134
134
|
summary: PWM comparison tool using MACROAPE approach
|
@@ -170,4 +170,3 @@ test_files:
|
|
170
170
|
- test/preprocess_collection_test.rb
|
171
171
|
- test/scan_collection_test.rb
|
172
172
|
- test/test_helper.rb
|
173
|
-
has_rdoc:
|