macroape 4.0.1 → 4.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4499602694103f438e53430766a1b5bad85baf61
4
- data.tar.gz: e95adc086488900be4e0f0654da3543a8ea6a8c2
3
+ metadata.gz: fe425b463f7ed42c64027a15d68ee85a38334058
4
+ data.tar.gz: f9789653cf1708f00eb9255a24c4c009c352f3a6
5
5
  SHA512:
6
- metadata.gz: 42fa7ba9a07625432176831623549395695aa936f19efcfd62e9282f1dc6bb5b93359103b7e84de6d3fcda4686dc5f8f1264756a050e0b1f2fb433c4b96809bb
7
- data.tar.gz: d4e94f5384e9e1cf4d34d777349b2d4e64e780f7b8ad9a5c71228c4b83b0b791165499c7bdc8c6c84a0a8d384bd617e9c312e68cb27e32713cd0be79a34cf879
6
+ metadata.gz: a09caa08fe447ef672ad91793151f0d1a2fef245141e86f5f54e467d881e32863a0e76502e7345297d702c9115d1b721e365a1c6e314e5cd5b55920e0f04a585
7
+ data.tar.gz: fcdf7a5724d1a8ad1d589a5d1d9ec892d780df3d05234af09e6ee546a486a95f4c833317ec2392fae65c55dd406b30d7e71e0eba9c09833edd86c4d489817679
data/TODO.txt CHANGED
@@ -1,3 +1,10 @@
1
+ Why Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') on java works only for pvalues less or equal than 0.5, but 0.55 or 0.8 breaks the program (it doesn't stop). My hypothesis is that gauss threshold estimation fails
2
+
3
+
4
+ Make --same-strand mode which disallows revcomp transformation of motif - so it'll be possible to compare RNA-PWMs.
5
+
6
+ Fix align_motifs so that keys can go after arguments (use OptionParser in other words)
7
+
1
8
  ToDo:
2
9
  6)
3
10
  # TODO: FIX: this test fails due to floating point precision error: estimated threshold is -19.0418 but '-19.0418'.to_f * 10000 = -190417.99999999997
@@ -77,11 +77,11 @@ module Macroape
77
77
 
78
78
  shifts = []
79
79
  shifts << [leader_pwm_file, 0, :direct]
80
- pwm_first = data_model.new(File.read(leader_pwm_file)).to_pwm
80
+ pwm_first = data_model.new(File.read(leader_pwm_file)).set_parameters(background: leader_background).to_pwm
81
81
  pwm_first.set_parameters(background: leader_background, max_hash_size: max_hash_size).discrete!(discretization)
82
82
 
83
83
  rest_pwm_files.each do |motif_name|
84
- pwm_second = data_model.new(File.read(motif_name)).to_pwm
84
+ pwm_second = data_model.new(File.read(motif_name)).set_parameters(background: rest_motifs_background).to_pwm
85
85
  pwm_second.set_parameters(background: rest_motifs_background, max_hash_size: max_hash_size).discrete!(discretization)
86
86
  cmp = Macroape::PWMCompare.new(pwm_first, pwm_second).set_parameters(max_pair_hash_size: max_pair_hash_size)
87
87
  info = cmp.jaccard_by_pvalue(pvalue)
@@ -101,7 +101,7 @@ module Macroape
101
101
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
102
102
  input_first = File.read(first_file)
103
103
  end
104
- pwm_first = data_model.new(input_first).to_pwm
104
+ pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
105
105
 
106
106
  if second_file == '.stdin'
107
107
  input_second = parser.parse
@@ -109,7 +109,7 @@ module Macroape
109
109
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
110
110
  input_second = File.read(second_file)
111
111
  end
112
- pwm_second = data_model.new(input_second).to_pwm
112
+ pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
113
113
 
114
114
  pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
115
115
  pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
@@ -81,7 +81,7 @@ module Macroape
81
81
  raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
82
82
  input_first = File.read(first_file)
83
83
  end
84
- pwm_first = data_model.new(input_first).to_pwm
84
+ pwm_first = data_model.new(input_first).set_parameters(background: first_background).to_pwm
85
85
 
86
86
  if second_file == '.stdin'
87
87
  input_second = parser.parse
@@ -89,7 +89,7 @@ module Macroape
89
89
  raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
90
90
  input_second = File.read(second_file)
91
91
  end
92
- pwm_second = data_model.new(input_second).to_pwm
92
+ pwm_second = data_model.new(input_second).set_parameters(background: second_background).to_pwm
93
93
 
94
94
  pwm_first.set_parameters(background: first_background, max_hash_size: max_hash_size).discrete!(discretization)
95
95
  pwm_second.set_parameters(background: second_background, max_hash_size: max_hash_size).discrete!(discretization)
@@ -62,7 +62,7 @@ module Macroape
62
62
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
63
63
  input = File.read(filename)
64
64
  end
65
- pwm = data_model.new(input).to_pwm
65
+ pwm = data_model.new(input).set_parameters(background: background).to_pwm
66
66
  pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
67
67
 
68
68
  counts = pwm.counts_by_thresholds(* thresholds.map{|count| count * discretization})
@@ -68,7 +68,7 @@ module Macroape
68
68
  raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
69
69
  input = File.read(filename)
70
70
  end
71
- pwm = data_model.new(input).to_pwm
71
+ pwm = data_model.new(input).set_parameters(background: background).to_pwm
72
72
  pwm.set_parameters(background: background, max_hash_size: max_hash_size).discrete!(discretization)
73
73
 
74
74
  infos = []
@@ -90,6 +90,7 @@ module Macroape
90
90
  filelist.each do |filename|
91
91
  motif = data_model.new(File.read(filename))
92
92
  motif.name ||= File.basename(filename, File.extname(filename))
93
+ motif.set_parameters(background: background)
93
94
  motifs << motif
94
95
  end
95
96
  else
@@ -94,7 +94,7 @@ module Macroape
94
94
  query_input = File.read(filename)
95
95
  end
96
96
 
97
- query_pwm = data_model.new(query_input).to_pwm
97
+ query_pwm = data_model.new(query_input).set_parameters(background: query_background).to_pwm
98
98
  query_pwm.set_parameters(background: query_background, max_hash_size: max_hash_size)
99
99
 
100
100
  query_pwm_rough = query_pwm.discrete(rough_discretization)
@@ -1,4 +1,4 @@
1
1
  module Macroape
2
- VERSION = "4.0.1"
2
+ VERSION = "4.0.2"
3
3
  STANDALONE = false
4
4
  end
@@ -6,6 +6,9 @@ describe Bioinform::PWM do
6
6
  let :matrix_second do [[1,2,3,4],[2,3,4,5]] end
7
7
  let :pwm_first do Bioinform::PWM.new(matrix_first) end
8
8
  let :pwm_second do Bioinform::PWM.new(matrix_second) end
9
+ let :background do [0.1,0.4,0.4,0.1] end
10
+ let :pwm_first_on_background do pwm_first.tap{|pwm| pwm.set_parameters(background: background)} end
11
+ let :pwm_second_on_background do pwm_second.tap{|pwm| pwm.set_parameters(background: background)} end
9
12
 
10
13
  context '#count_distribution_after_threshold' do
11
14
 
@@ -21,6 +24,19 @@ describe Bioinform::PWM do
21
24
  distribution_second.should == { 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
22
25
  end
23
26
 
27
+ it 'for PWMs on different background it should contain the same scores (keys of hash)' do
28
+ pwm_first.count_distribution_after_threshold(0).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(0).keys.sort
29
+ pwm_first.count_distribution_after_threshold(13).keys.sort.should == pwm_first_on_background.count_distribution_after_threshold(13).keys.sort
30
+ end
31
+
32
+ it 'should return hash of score => count for all scores >= threshold when calculated on background' do
33
+ distribution_second = pwm_second_on_background.count_distribution_after_threshold(0)
34
+ distribution_second.should have_nearly_the_same_values({ 3=>0.01, 4=>0.08, 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
35
+
36
+ distribution_second = pwm_second_on_background.count_distribution_after_threshold(5)
37
+ distribution_second.should have_nearly_the_same_values({ 5=>0.24, 6=>0.34, 7=>0.24, 8=>0.08, 9=>0.01 }, 1e-7 )
38
+ end
39
+
24
40
  it 'should use existing precalculated hash @count_distribution if it exists' do
25
41
  pwm = pwm_second;
26
42
  pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
@@ -48,4 +64,46 @@ describe Bioinform::PWM do
48
64
  end
49
65
  end
50
66
 
51
- end
67
+ context '#pvalue_by_threshold' do
68
+ it 'should return probability to be >= than threshold' do
69
+ pwm_second.pvalue_by_threshold(7).should be_within(1e-7).of(6.0/16)
70
+ end
71
+ it 'should return probability to be >= than threshold when calculated on background' do
72
+ pwm_second_on_background.pvalue_by_threshold(7).should be_within(1e-7).of(0.33)
73
+ end
74
+ end
75
+ context '#threshold' do
76
+ it 'should return threshold such that according pvalue doesn\'t exceed requested value' do
77
+ requested_pvalue = 6.0/16
78
+ threshold = pwm_second.threshold(requested_pvalue)
79
+ pwm_second.pvalue_by_threshold(threshold).should <= requested_pvalue
80
+ end
81
+ it 'should return threshold such that according pvalue doesn\'t exceed requested value when calculated on background' do
82
+ requested_pvalue = 0.33
83
+ threshold = pwm_second_on_background.threshold(requested_pvalue)
84
+ pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
85
+ end
86
+ it 'should return threshold such that according pvalue doesn\'t exceed requested value when actual pvalue isn\'t exact equal to requested' do
87
+ requested_pvalue = 0.335
88
+ threshold = pwm_second_on_background.threshold(requested_pvalue)
89
+ pwm_second_on_background.pvalue_by_threshold(threshold).should <= requested_pvalue
90
+ end
91
+ end
92
+ context '#weak_threshold' do
93
+ it 'should return threshold such that according pvalue exceed requested value' do
94
+ requested_pvalue = 6.0/16
95
+ threshold = pwm_second.weak_threshold(requested_pvalue)
96
+ pwm_second.pvalue_by_threshold(threshold).should >= requested_pvalue
97
+ end
98
+ it 'should return threshold such that according pvalue exceed requested value when calculated on background' do
99
+ requested_pvalue = 0.33
100
+ threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
101
+ pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
102
+ end
103
+ it 'should return threshold such that according pvalue exceed requested value when actual pvalue isn\'t exact equal to requested' do
104
+ requested_pvalue = 0.335
105
+ threshold = pwm_second_on_background.weak_threshold(requested_pvalue)
106
+ pwm_second_on_background.pvalue_by_threshold(threshold).should >= requested_pvalue
107
+ end
108
+ end
109
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,11 @@
1
1
  $bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
2
2
  $LOAD_PATH.unshift $bioinform_folder
3
3
 
4
- require 'rspec'
4
+ require 'rspec'
5
+
6
+ # comparing hashes with float values
7
+ RSpec::Matchers.define :have_nearly_the_same_values do |expected, vicinity|
8
+ match do |actual|
9
+ expected.all?{|key, _| actual.has_key?(key)} && actual.all?{|key, _| expected.has_key?(key)} && expected.all?{|key, value| (actual[key] - value).abs <= vicinity }
10
+ end
11
+ end
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macroape
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.1
4
+ version: 4.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Vorontsov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-01 00:00:00.000000000 Z
11
+ date: 2013-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bioinform
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.1.10
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.1.10
27
27
  description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
@@ -42,7 +42,7 @@ executables:
42
42
  extensions: []
43
43
  extra_rdoc_files: []
44
44
  files:
45
- - .gitignore
45
+ - ".gitignore"
46
46
  - Gemfile
47
47
  - LICENSE
48
48
  - README.md
@@ -118,17 +118,17 @@ require_paths:
118
118
  - lib
119
119
  required_ruby_version: !ruby/object:Gem::Requirement
120
120
  requirements:
121
- - - '>='
121
+ - - ">="
122
122
  - !ruby/object:Gem::Version
123
123
  version: '0'
124
124
  required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  requirements:
126
- - - '>='
126
+ - - ">="
127
127
  - !ruby/object:Gem::Version
128
128
  version: '0'
129
129
  requirements: []
130
130
  rubyforge_project:
131
- rubygems_version: 2.0.3
131
+ rubygems_version: 2.1.5
132
132
  signing_key:
133
133
  specification_version: 4
134
134
  summary: PWM comparison tool using MACROAPE approach
@@ -170,4 +170,3 @@ test_files:
170
170
  - test/preprocess_collection_test.rb
171
171
  - test/scan_collection_test.rb
172
172
  - test/test_helper.rb
173
- has_rdoc: