macroape 4.0.1 → 4.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4499602694103f438e53430766a1b5bad85baf61
4
- data.tar.gz: e95adc086488900be4e0f0654da3543a8ea6a8c2
3
+ metadata.gz: fe425b463f7ed42c64027a15d68ee85a38334058
4
+ data.tar.gz: f9789653cf1708f00eb9255a24c4c009c352f3a6
5
5
  SHA512:
6
- metadata.gz: 42fa7ba9a07625432176831623549395695aa936f19efcfd62e9282f1dc6bb5b93359103b7e84de6d3fcda4686dc5f8f1264756a050e0b1f2fb433c4b96809bb
7
- data.tar.gz: d4e94f5384e9e1cf4d34d777349b2d4e64e780f7b8ad9a5c71228c4b83b0b791165499c7bdc8c6c84a0a8d384bd617e9c312e68cb27e32713cd0be79a34cf879
6
+ metadata.gz: a09caa08fe447ef672ad91793151f0d1a2fef245141e86f5f54e467d881e32863a0e76502e7345297d702c9115d1b721e365a1c6e314e5cd5b55920e0f04a585
7
+ data.tar.gz: fcdf7a5724d1a8ad1d589a5d1d9ec892d780df3d05234af09e6ee546a486a95f4c833317ec2392fae65c55dd406b30d7e71e0eba9c09833edd86c4d489817679
data/TODO.txt CHANGED
@@ -1,3 +1,10 @@
1
+ Why Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') on java works only for pvalues less or equal than 0.5, but 0.55 or 0.8 breaks the program (it doesn't stop). My hypothesis is that gauss threshold estimation fails
2
+
3
+
4
+ Make --same-strand mode which disallows revcomp transformation of motif - so it'll be possible to compare RNA-PWMs.
5
+
6
+ Fix align_motifs so that keys can go after arguments (use OptionParser in other words)
7
+
1
8
  ToDo:
2
9
  6)
3
10
  # TODO: FIX: this test fails due to floating point precision error: estimated threshold is -19.0418 but '-19.0418'.to_f * 10000 = -190417.99999999997
@@ -77,11 +77,11 @@ module Macroape
77
77
 
78
78
  shifts = []
79
79
  shifts << [leader_pwm_file, 0, :direct]
80
- pwm_first = data_model.new(File.read(leader_pwm_file)).to_pwm
80
+ pwm_first = data_model.new(File.read(leader_pwm_file)).set_parameters(background: leader_background).to_pwm
81
81
  pwm_first.set_parameters(background: leader_background, max_hash_size: max_hash_size).discrete!(discretization)
82
82
 
83
83
  rest_pwm_files.each do |motif_name|
84
- pwm_second = data_model.new(File.read(motif_name)).to_pwm
84
+ pwm_second = data_model.new(File.read(motif_name)).set_parameters(background: rest_motifs_background).to_pwm
85
85
  pwm_second.set_parameters(background: rest_motifs_background, max_hash_size: max_hash_size).discrete!(discretization)
86
86
  cmp = Macroape::PWMCompare.new(pwm_first, pwm_second).set_parameters(max_pair_hash_size: max_pair_hash_size)
87
87
  info = cmp.jaccard_by_pvalue(pvalue)
@@ -101,7 +101,7 @@ module Macroape
101
101
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
102
102
  input_first = File.read(first_file)
103
103
  end
104
- pwm_first = data_model.new(input_first).to_pwm
104
+ pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
105
105
 
106
106
  if second_file == '.stdin'
107
107
  input_second = parser.parse
@@ -109,7 +109,7 @@ module Macroape
109
109
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
110
110
  input_second = File.read(second_file)
111
111
  end
112
- pwm_second = data_model.new(input_second).to_pwm
112
+ pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
113
113
 
114
114
  pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
115
115
  pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
@@ -81,7 +81,7 @@ module Macroape
81
81
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
82
82
  input_first = File.read(first_file)
83
83
  end
84
- pwm_first = data_model.new(input_first).to_pwm
84
+ pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
85
85
 
86
86
  if second_file == '.stdin'
87
87
  input_second = parser.parse
@@ -89,7 +89,7 @@ module Macroape
89
89
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
90
90
  input_second = File.read(second_file)
91
91
  end
92
- pwm_second = data_model.new(input_second).to_pwm
92
+ pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
93
93
 
94
94
  pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
95
95
  pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
@@ -62,7 +62,7 @@ module Macroape
62
62
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
63
63
  input = File.read(filename)
64
64
  end
65
- pwm = data_model.new(input).to_pwm
65
+ pwm = data_model.new(input).set_parameters(background: background).to_pwm
66
66
  pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
67
67
 
68
68
  counts = pwm.counts_by_thresholds(* thresholds.map{|count| count * discretization})
@@ -68,7 +68,7 @@ module Macroape
68
68
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
69
69
  input = File.read(filename)
70
70
  end
71
- pwm = data_model.new(input).to_pwm
71
+ pwm = data_model.new(input).set_parameters(background: background).to_pwm
72
72
  pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
73
73
 
74
74
  infos = []
@@ -90,6 +90,7 @@ module Macroape
90
90
  filelist.each do |filename|
91
91
  motif = data_model.new(File.read(filename))
92
92
  motif.name ||= File.basename(filename, File.extname(filename))
93
+ motif.set_parameters(background: background)
93
94
  motifs << motif
94
95
  end
95
96
  else
@@ -94,7 +94,7 @@ module Macroape
94
94
  query_input = File.read(filename)
95
95
  end
96
96
 
97
- query_pwm = data_model.new(query_input).to_pwm
97
+ query_pwm = data_model.new(query_input).set_parameters(background: query_background).to_pwm
98
98
  query_pwm.set_parameters(background: query_background, max_hash_size: max_hash_size)
99
99
 
100
100
  query_pwm_rough = query_pwm.discrete(rough_discretization)
@@ -1,4 +1,4 @@
1
1
  module Macroape
2
- VERSION = "4.0.1"
2
+ VERSION = "4.0.2"
3
3
  STANDALONE = false
4
4
  end
@@ -6,6 +6,9 @@ describe Bioinform::PWM do
6
6
  let :matrix_second do [[1,2,3,4],[2,3,4,5]] end
7
7
  let :pwm_first do Bioinform::PWM.new(matrix_first) end
8
8
  let :pwm_second do Bioinform::PWM.new(matrix_second) end
9
+ let :background do [0.1,0.4,0.4,0.1] end
10
+ let :pwm_first_on_background do pwm_first.tap{|pwm| pwm.set_parameters(background: background)} end
11
+ let :pwm_second_on_background do pwm_second.tap{|pwm| pwm.set_parameters(background: background)} end
9
12
 
10
13
  context '#count_distribution_after_threshold' do
11
14
 
@@ -21,6 +24,19 @@ describe Bioinform::PWM do
21
24
  distribution_second.should == { 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
22
25
  end
23
26
 
27
+ it 'for PWMs on different background it should contain the same scores (keys of hash)' do
28
+ pwm_first.count_distribution_after_threshold(0).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(0).keys.sort
29
+ pwm_first.count_distribution_after_threshold(13).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(13).keys.sort
30
+ end
31
+
32
+ it 'should return hash of score => count for all scores >= threshold when calculated on background' do
33
+ distribution_second = pwm_second_on_background.count_distribution_after_threshold(0)
34
+ distribution_second.should have_nearly_the_same_values({ 3=>0.01, 4=>0.08, 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
35
+
36
+ distribution_second = pwm_second_on_background.count_distribution_after_threshold(5)
37
+ distribution_second.should have_nearly_the_same_values({ 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
38
+ end
39
+
24
40
  it 'should use existing precalculated hash @count_distribution if it exists' do
25
41
  pwm = pwm_second;
26
42
  pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
@@ -48,4 +64,46 @@ describe Bioinform::PWM do
48
64
  end
49
65
  end
50
66
 
51
- end
67
+ context '#pvalue_by_threshold' do
68
+ it 'should return probability to be >= than threshold' do
69
+ pwm_second.pvalue_by_threshold(7).should be_within(1e-7).of(6.0/16)
70
+ end
71
+ it 'should return probability to be >= than threshold when calculated on background' do
72
+ pwm_second_on_background.pvalue_by_threshold(7).should be_within(1e-7).of(0.33)
73
+ end
74
+ end
75
+ context '#threshold' do
76
+ it 'should return threshold such that according pvalue doesn\'t exceed requested value' do
77
+ requested_pvalue = 6.0/16
78
+ threshold = pwm_second.threshold(requested_pvalue)
79
+ pwm_second.pvalue_by_threshold(threshold).should <= requested_pvalue
80
+ end
81
+ it 'should return threshold such that according pvalue doesn\'t exceed requested value when calculated on background' do
82
+ requested_pvalue = 0.33
83
+ threshold = pwm_second_on_background.threshold(requested_pvalue)
84
+ pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
85
+ end
86
+ it 'should return threshold such that according pvalue doesn\'t exceed requested value when actual pvalue isn\'t exact equal to requested' do
87
+ requested_pvalue = 0.335
88
+ threshold = pwm_second_on_background.threshold(requested_pvalue)
89
+ pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
90
+ end
91
+ end
92
+ context '#weak_threshold' do
93
+ it 'should return threshold such that according pvalue exceed requested value' do
94
+ requested_pvalue = 6.0/16
95
+ threshold = pwm_second.weak_threshold(requested_pvalue)
96
+ pwm_second.pvalue_by_threshold(threshold).should >= requested_pvalue
97
+ end
98
+ it 'should return threshold such that according pvalue exceed requested value when calculated on background' do
99
+ requested_pvalue = 0.33
100
+ threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
101
+ pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
102
+ end
103
+ it 'should return threshold such that according pvalue exceed requested value when actual pvalue isn\'t exact equal to requested' do
104
+ requested_pvalue = 0.335
105
+ threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
106
+ pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
107
+ end
108
+ end
109
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,11 @@
1
1
  $bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
2
2
  $LOAD_PATH.unshift $bioinform_folder
3
3
 
4
- require 'rspec'
4
+ require 'rspec'
5
+
6
+ # comparing hashes with float values
7
+ RSpec::Matchers.define :have_nearly_the_same_values do |expected, vicinity|
8
+ match do |actual|
9
+ expected.all?{|key, _| actual.has_key?(key)} && actual.all?{|key, _| expected.has_key?(key)} && expected.all?{|key, value| (actual[key] - value).abs <= vicinity }
10
+ end
11
+ end
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.1
4
+ version: 4.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-01 00:00:00.000000000 Z
11
+ date: 2013-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bioinform
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.1.10
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.1.10
27
27
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
@@ -42,7 +42,7 @@ executables:
42
42
  extensions: []
43
43
  extra_rdoc_files: []
44
44
  files:
45
- - .gitignore
45
+ - ".gitignore"
46
46
  - Gemfile
47
47
  - LICENSE
48
48
  - README.md
@@ -118,17 +118,17 @@ require_paths:
118
118
  - lib
119
119
  required_ruby_version: !ruby/object:Gem::Requirement
120
120
  requirements:
121
- - - '>='
121
+ - - ">="
122
122
  - !ruby/object:Gem::Version
123
123
  version: '0'
124
124
  required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  requirements:
126
- - - '>='
126
+ - - ">="
127
127
  - !ruby/object:Gem::Version
128
128
  version: '0'
129
129
  requirements: []
130
130
  rubyforge_project:
131
- rubygems_version: 2.0.3
131
+ rubygems_version: 2.1.5
132
132
  signing_key:
133
133
  specification_version: 4
134
134
  summary: PWM comparison tool using MACROAPE approach
@@ -170,4 +170,3 @@ test_files:
170
170
  - test/preprocess_collection_test.rb
171
171
  - test/scan_collection_test.rb
172
172
  - test/test_helper.rb
173
- has_rdoc: