macroape 4.0.2 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +22 -22
- data/README.md +70 -70
- data/Rakefile.rb +49 -49
- data/TODO.txt +46 -46
- data/benchmark/benchmark_helper.rb +4 -4
- data/benchmark/similarity_benchmark.rb +52 -52
- data/bin/align_motifs +4 -4
- data/bin/eval_alignment +4 -4
- data/bin/eval_similarity +4 -4
- data/bin/find_pvalue +4 -4
- data/bin/find_threshold +4 -4
- data/bin/preprocess_collection +4 -4
- data/bin/scan_collection +4 -4
- data/lib/macroape.rb +14 -11
- data/lib/macroape/aligned_pair_intersection.rb +61 -62
- data/lib/macroape/cli.rb +191 -188
- data/lib/macroape/cli/align_motifs.rb +120 -100
- data/lib/macroape/cli/eval_alignment.rb +157 -156
- data/lib/macroape/cli/eval_similarity.rb +138 -137
- data/lib/macroape/cli/find_pvalue.rb +93 -87
- data/lib/macroape/cli/find_threshold.rb +103 -96
- data/lib/macroape/cli/preprocess_collection.rb +169 -161
- data/lib/macroape/cli/scan_collection.rb +171 -163
- data/lib/macroape/collection.rb +29 -0
- data/lib/macroape/motif_with_thresholds.rb +18 -0
- data/lib/macroape/pwm_compare.rb +39 -44
- data/lib/macroape/pwm_compare_aligned.rb +139 -130
- data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
- data/lib/macroape/support/inverf.rb +13 -0
- data/lib/macroape/support/partial_sums.rb +17 -0
- data/lib/macroape/version.rb +4 -4
- data/macroape.gemspec +19 -19
- data/spec/count_distribution_spec.rb +112 -109
- data/spec/inverf_spec.rb +23 -0
- data/spec/partial_sums_spec.rb +28 -0
- data/spec/spec_helper.rb +11 -11
- data/test/align_motifs_test.rb +42 -43
- data/test/data/AHR_si.pwm +10 -10
- data/test/data/KLF3_f1.pcm +16 -16
- data/test/data/KLF3_f1.pwm +16 -16
- data/test/data/KLF4_f2.pcm +11 -11
- data/test/data/KLF4_f2.pwm +11 -11
- data/test/data/KLF4_f2_scan_results_all.txt +2 -2
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
- data/test/data/SP1_f1.pcm +12 -12
- data/test/data/SP1_f1.pwm +12 -12
- data/test/data/SP1_f1_revcomp.pcm +12 -12
- data/test/data/SP1_f1_revcomp.pwm +12 -12
- data/test/data/medium_motif.pwm +8 -8
- data/test/data/short_motif.pwm +7 -7
- data/test/data/test_collection.yaml +231 -214
- data/test/data/test_collection/GABPA_f1.pwm +14 -14
- data/test/data/test_collection/KLF4_f2.pwm +10 -10
- data/test/data/test_collection/SP1_f1.pwm +12 -12
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
- data/test/data/test_collection_single_file.txt +38 -38
- data/test/data/test_collection_single_file_pcm.txt +37 -37
- data/test/data/test_collection_weak.yaml +231 -214
- data/test/eval_alignment_test.rb +90 -111
- data/test/eval_similarity_test.rb +105 -123
- data/test/find_pvalue_test.rb +34 -39
- data/test/find_threshold_test.rb +87 -91
- data/test/preprocess_collection_test.rb +56 -65
- data/test/scan_collection_test.rb +42 -48
- data/test/test_helper.rb +159 -160
- metadata +14 -10
- data/test/data/collection_pcm_without_thresholds.yaml +0 -188
- data/test/data/collection_without_thresholds.yaml +0 -188
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c48f7766729e453bb4da35c7fa4c9792cc481f69
|
4
|
+
data.tar.gz: da9e81f90fa159d7f64f9fa49ca4d045fc53eef6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cae11debfcc446886bf04bfb8f8f30a4035becedf7720606a532ec46d4007f3437a574c7294835247030f7f2dd4f374a7396adc5709b4f0845612b6e1e7ae5a2
|
7
|
+
data.tar.gz: dc2b3811fa99aa6a6a06cb57fd7ded315db4cdece6c17729093aa03d21f6bbd248ed7e5234085e581a8131693e0589cfa2bc5a90fc5452875dad9fb95d89f298
|
data/.gitignore
CHANGED
@@ -1,18 +1,18 @@
|
|
1
|
-
*.gem
|
2
|
-
*.rbc
|
3
|
-
.bundle
|
4
|
-
.config
|
5
|
-
.yardoc
|
6
|
-
Gemfile.lock
|
7
|
-
InstalledFiles
|
8
|
-
_yardoc
|
9
|
-
coverage
|
10
|
-
doc/
|
11
|
-
lib/bundler/man
|
12
|
-
pkg
|
13
|
-
rdoc
|
14
|
-
spec/reports
|
15
|
-
test/tmp
|
16
|
-
test/version_tmp
|
17
|
-
tmp
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
18
|
benchmark/*.log
|
data/Gemfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
|
3
|
-
# Specify your gem's dependencies in macroape.gemspec
|
4
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in macroape.gemspec
|
4
|
+
gemspec
|
data/LICENSE
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
Copyright (c) 2011-
|
2
|
-
|
3
|
-
MIT License
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
-
a copy of this software and associated documentation files (the
|
7
|
-
"Software"), to deal in the Software without restriction, including
|
8
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
the following conditions:
|
12
|
-
|
13
|
-
The above copyright notice and this permission notice shall be
|
14
|
-
included in all copies or substantial portions of the Software.
|
15
|
-
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
1
|
+
Copyright (c) 2011-2014 Ilya Vorontsov, Ivan Kulakovskiy, Vsevolod Makeev
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,70 +1,70 @@
|
|
1
|
-
# Macroape
|
2
|
-
|
3
|
-
Macroape is abbreviation for MAtrix CompaRisOn by Approximate P-value Estimation. It's a bioinformatic tool for evaluating similarity measure between a pair of Position Weight Matrices. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
|
4
|
-
|
5
|
-
## Installation
|
6
|
-
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
|
-
gem 'macroape'
|
10
|
-
|
11
|
-
And then execute:
|
12
|
-
|
13
|
-
$ bundle
|
14
|
-
|
15
|
-
Or install it yourself as:
|
16
|
-
|
17
|
-
$ gem install macroape
|
18
|
-
|
19
|
-
## Usage
|
20
|
-
For more information read manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
|
21
|
-
|
22
|
-
## Basic usage as a command-line tool
|
23
|
-
MacroAPE have 7 command line tools:
|
24
|
-
|
25
|
-
### Tools for calculating thresholds and pvalues:
|
26
|
-
* find_threshold \<PWM file\> [\<pvalue(by default: 0.0005)\>...]
|
27
|
-
* find_pvalue \<PWM file\> \<threshold\>...
|
28
|
-
|
29
|
-
### Tools for evaluating Jaccard similarity measure in the best alignment and in certain alignment:
|
30
|
-
* eval_similarity \<first PWM file\> \<second PWM file\>
|
31
|
-
* eval_alignment \<first PWM file\> \<second PWM file\> \<shift of second matrix\> \<orientation of second matrix(direct|revcomp)\>
|
32
|
-
|
33
|
-
### Tools for looking through collection for the motifs most similar to a query motif
|
34
|
-
* preprocess_collection \<folder with motif files\> \<collection output file\>
|
35
|
-
* scan_collection \<query PWM file\> \<collection file\>
|
36
|
-
|
37
|
-
### Tool for finding mutual alignment of several motifs relative to first(leader) motif. It's designed to use with sequence_logo to draw logos of clusters
|
38
|
-
* align_motifs \<pwm_leader\> \<pwm_2\> \<pwm_3\> ...
|
39
|
-
|
40
|
-
Also you can use -h option to print help for a tool in console.
|
41
|
-
There are lots of different command line options. Most useful option is -d <discretization=1|10|100|1000>. You can vary precision/speed rate by specifing a discretization. For more information look through a manual.
|
42
|
-
Some of tools also can process PCMs in addition to PWMs.
|
43
|
-
|
44
|
-
## Basic usage in your code
|
45
|
-
require 'macroape'
|
46
|
-
background = [1,1,1,1]
|
47
|
-
discretization = 10
|
48
|
-
pwm_first = Bioinform::PWM.new(File.read('first_pwm.pat')).background(background).discrete(discretization)
|
49
|
-
pwm_second = Bioinform::PWM.new(File.read('first_pwm.pat')).background(background).discrete(discretization)
|
50
|
-
|
51
|
-
cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
|
52
|
-
first_threshold = pwm_first.threshold(pvalue)
|
53
|
-
second_threshold = pwm_second.threshold(pvalue)
|
54
|
-
|
55
|
-
similarity_info = cmp.jaccard(first_threshold, second_threshold)
|
56
|
-
puts "Jaccard similarity: #{similarity_info[:similarity]}"
|
57
|
-
|
58
|
-
For more details look a source code of utilities in lib/macroape/cli/ folder
|
59
|
-
|
60
|
-
## Contributing
|
61
|
-
|
62
|
-
1. Fork it
|
63
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
64
|
-
3. Commit your changes (`git commit -am 'Added some feature'`)
|
65
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
66
|
-
5. Create new Pull Request
|
67
|
-
|
68
|
-
If you're developing both macroape and bioinform - it may be useful to know that test_helper and spec_helper expands require path in such a way that if you have two "cousin" folders: macroape and bioinform then macroape specs will require bioinform from development folder not from gem. It can save you lots of time not to rebuild-reinstall bioinform gem each time it get some changes
|
69
|
-
|
70
|
-
Copyright (c) 2011-
|
1
|
+
# Macroape
|
2
|
+
|
3
|
+
Macroape is abbreviation for MAtrix CompaRisOn by Approximate P-value Estimation. It's a bioinformatic tool for evaluating similarity measure between a pair of Position Weight Matrices. Used approach and application described in manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'macroape'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install macroape
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
For more information read manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw
|
21
|
+
|
22
|
+
## Basic usage as a command-line tool
|
23
|
+
MacroAPE have 7 command line tools:
|
24
|
+
|
25
|
+
### Tools for calculating thresholds and pvalues:
|
26
|
+
* find_threshold \<PWM file\> [\<pvalue(by default: 0.0005)\>...]
|
27
|
+
* find_pvalue \<PWM file\> \<threshold\>...
|
28
|
+
|
29
|
+
### Tools for evaluating Jaccard similarity measure in the best alignment and in certain alignment:
|
30
|
+
* eval_similarity \<first PWM file\> \<second PWM file\>
|
31
|
+
* eval_alignment \<first PWM file\> \<second PWM file\> \<shift of second matrix\> \<orientation of second matrix(direct|revcomp)\>
|
32
|
+
|
33
|
+
### Tools for looking through collection for the motifs most similar to a query motif
|
34
|
+
* preprocess_collection \<folder with motif files\> \<collection output file\>
|
35
|
+
* scan_collection \<query PWM file\> \<collection file\>
|
36
|
+
|
37
|
+
### Tool for finding mutual alignment of several motifs relative to first(leader) motif. It's designed to use with sequence_logo to draw logos of clusters
|
38
|
+
* align_motifs \<pwm_leader\> \<pwm_2\> \<pwm_3\> ...
|
39
|
+
|
40
|
+
Also you can use -h option to print help for a tool in console.
|
41
|
+
There are lots of different command line options. Most useful option is -d <discretization=1|10|100|1000>. You can vary precision/speed rate by specifing a discretization. For more information look through a manual.
|
42
|
+
Some of tools also can process PCMs in addition to PWMs.
|
43
|
+
|
44
|
+
## Basic usage in your code
|
45
|
+
require 'macroape'
|
46
|
+
background = [1,1,1,1]
|
47
|
+
discretization = 10
|
48
|
+
pwm_first = Bioinform::PWM.new(File.read('first_pwm.pat')).background(background).discrete(discretization)
|
49
|
+
pwm_second = Bioinform::PWM.new(File.read('first_pwm.pat')).background(background).discrete(discretization)
|
50
|
+
|
51
|
+
cmp = Macroape::PWMCompare.new(pwm_first, pwm_second)
|
52
|
+
first_threshold = pwm_first.threshold(pvalue)
|
53
|
+
second_threshold = pwm_second.threshold(pvalue)
|
54
|
+
|
55
|
+
similarity_info = cmp.jaccard(first_threshold, second_threshold)
|
56
|
+
puts "Jaccard similarity: #{similarity_info[:similarity]}"
|
57
|
+
|
58
|
+
For more details look a source code of utilities in lib/macroape/cli/ folder
|
59
|
+
|
60
|
+
## Contributing
|
61
|
+
|
62
|
+
1. Fork it
|
63
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
64
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
65
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
66
|
+
5. Create new Pull Request
|
67
|
+
|
68
|
+
If you're developing both macroape and bioinform - it may be useful to know that test_helper and spec_helper expands require path in such a way that if you have two "cousin" folders: macroape and bioinform then macroape specs will require bioinform from development folder not from gem. It can save you lots of time not to rebuild-reinstall bioinform gem each time it get some changes
|
69
|
+
|
70
|
+
Copyright (c) 2011-2014 Ilya Vorontsov, Ivan Kulakovskiy, Vsevolod Makeev
|
data/Rakefile.rb
CHANGED
@@ -1,50 +1,50 @@
|
|
1
|
-
#!/usr/bin/env rake
|
2
|
-
require "bundler/gem_tasks"
|
3
|
-
require 'rspec/core/rake_task'
|
4
|
-
require 'rake/testtask'
|
5
|
-
|
6
|
-
namespace :spec do
|
7
|
-
Rake::TestTask.new do |t|
|
8
|
-
t.libs << "test"
|
9
|
-
t.test_files = FileList['test/*_test.rb']
|
10
|
-
t.verbose = true
|
11
|
-
end
|
12
|
-
RSpec::Core::RakeTask.new
|
13
|
-
end
|
14
|
-
|
15
|
-
desc 'Test all functionality of gem executables'
|
16
|
-
task :spec => ['spec:test', 'spec:spec']
|
17
|
-
|
18
|
-
namespace :benchmark do
|
19
|
-
task :run do
|
20
|
-
require 'open3'
|
21
|
-
time = Time.now.strftime("%d-%m-%Y, %H:%M:%S sec")
|
22
|
-
File.open('benchmark/benchmark.log','a') do |f|
|
23
|
-
f.puts "=========================================================\n#{time}\n"
|
24
|
-
Dir.glob('benchmark/*_benchmark.rb') do |benchmark_filename|
|
25
|
-
Open3.popen3("ruby -I ./benchmark #{benchmark_filename}") do |inp, out, err, wait_thr|
|
26
|
-
benchmark_name = File.basename(benchmark_filename)
|
27
|
-
out_str = out.read
|
28
|
-
err_str = err.read
|
29
|
-
|
30
|
-
benchmark_infos = "-------------------\n#{benchmark_name}:\n#{out_str}\n"
|
31
|
-
benchmark_infos_to_file = benchmark_infos
|
32
|
-
puts benchmark_infos
|
33
|
-
|
34
|
-
if err_str && !err_str.empty?
|
35
|
-
STDERR.puts(err_str)
|
36
|
-
benchmark_infos_to_file = benchmark_infos + "\n!!!\nError:\n#{err_str}\n"
|
37
|
-
end
|
38
|
-
|
39
|
-
# add info about git commit (if everything is commited, otherwise to commit one should use special option -c)
|
40
|
-
f.puts benchmark_infos_to_file
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
task :show do
|
46
|
-
puts File.read('benchmark/benchmark.log')
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
namespace :spec do
|
7
|
+
Rake::TestTask.new do |t|
|
8
|
+
t.libs << "test"
|
9
|
+
t.test_files = FileList['test/*_test.rb']
|
10
|
+
t.verbose = true
|
11
|
+
end
|
12
|
+
RSpec::Core::RakeTask.new
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'Test all functionality of gem executables'
|
16
|
+
task :spec => ['spec:test', 'spec:spec']
|
17
|
+
|
18
|
+
namespace :benchmark do
|
19
|
+
task :run do
|
20
|
+
require 'open3'
|
21
|
+
time = Time.now.strftime("%d-%m-%Y, %H:%M:%S sec")
|
22
|
+
File.open('benchmark/benchmark.log','a') do |f|
|
23
|
+
f.puts "=========================================================\n#{time}\n"
|
24
|
+
Dir.glob('benchmark/*_benchmark.rb') do |benchmark_filename|
|
25
|
+
Open3.popen3("ruby -I ./benchmark #{benchmark_filename}") do |inp, out, err, wait_thr|
|
26
|
+
benchmark_name = File.basename(benchmark_filename)
|
27
|
+
out_str = out.read
|
28
|
+
err_str = err.read
|
29
|
+
|
30
|
+
benchmark_infos = "-------------------\n#{benchmark_name}:\n#{out_str}\n"
|
31
|
+
benchmark_infos_to_file = benchmark_infos
|
32
|
+
puts benchmark_infos
|
33
|
+
|
34
|
+
if err_str && !err_str.empty?
|
35
|
+
STDERR.puts(err_str)
|
36
|
+
benchmark_infos_to_file = benchmark_infos + "\n!!!\nError:\n#{err_str}\n"
|
37
|
+
end
|
38
|
+
|
39
|
+
# add info about git commit (if everything is commited, otherwise to commit one should use special option -c)
|
40
|
+
f.puts benchmark_infos_to_file
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
task :show do
|
46
|
+
puts File.read('benchmark/benchmark.log')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
50
|
task :benchmark => 'benchmark:run'
|
data/TODO.txt
CHANGED
@@ -1,47 +1,47 @@
|
|
1
|
-
Why Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') on java works only for pvalues less or equal than 0.5, but 0.55 or 0.8 breaks the program (it doesn't stop). My hypothesis is that gauss threshold estimation fails
|
2
|
-
|
3
|
-
|
4
|
-
Make --same-strand mode which disallows revcomp transformation of motif - so it'll be possible to compare RNA-PWMs.
|
5
|
-
|
6
|
-
Fix align_motifs so that keys can go after arguments (use OptionParser in other words)
|
7
|
-
|
8
|
-
ToDo:
|
9
|
-
6)
|
10
|
-
# TODO: FIX: this test fails due to floating point precision error: estimated threshold is -19.0418 but '-19.0418'.to_f * 10000 = -190417.99999999997
|
11
|
-
# A workaround exists: we can use fractions, i.e. ('-19.0418'.to_r * 10000).to_f = -190418.0 but it obscures code and being used uncarefully can involve huge slowdown.
|
12
|
-
# I think, it'd be used only at input to workaround discretization issue
|
13
|
-
#
|
14
|
-
# def test_process_large_pvalue_floating_point_error
|
15
|
-
# pvalue, threshold, real_pvalue = nil, nil, nil
|
16
|
-
# assert_nothing_raised {
|
17
|
-
# pvalue, threshold, real_pvalue = Helpers.find_threshold_output('KLF4_f2.pwm -p 0.8').strip.split("\t")
|
18
|
-
# }
|
19
|
-
# assert_equal '0.8', pvalue
|
20
|
-
# assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm #{threshold}"), real_pvalue
|
21
|
-
# end
|
22
|
-
7)thresholds and thresholds_weak should return a collection (Array or Hash) when block not given
|
23
|
-
merge this two methods into one parametrized method
|
24
|
-
8)(TODO: for theoretically consistency, while making small inconsistences to old calculations)
|
25
|
-
When we work with strong threshold, we round matrix up(in order to overrate threshold comparing to real thus taking underrated pvalue) and take upper bound of discrete-thresholds fork.
|
26
|
-
When we are estimating lower bound of threshold (weak threshold) we take lower bound of fork of discrete thresholds. But we should ALSO (not done yet) take matrix discreted down! This'd allow us give exact answer on a question in which range real threshold should lay with given P-value, now we correctly estimate only lower bound of threshold(upper bound of P-value)
|
27
|
-
9) (may be) Option to specify predefined query motif threshold in scan_collection
|
28
|
-
10) Fix Readme!
|
29
|
-
|
30
|
-
Specs and tests:
|
31
|
-
create spec on use of MaxHashSize, MaxHashSizeDouble
|
32
|
-
create spec for testing case when {real_pvalue == 0, threshold == best_score + 1}
|
33
|
-
create test for nonuniform word-wise background([1,1,1,1]) and for different backgrounds
|
34
|
-
|
35
|
-
Ideas to increase perfomance:
|
36
|
-
- Add shifting matrix elements to zero after discreeting - in such case worst suffix is zero at all positions (??! it can significantly obscure code because thresholds will be changed too, and I can't tell what is better: slight perfomance optimization or conciseness of code)
|
37
|
-
- (?) Make rearrangment of rows by DIC decreasing in aligned pair of matrices before counting
|
38
|
-
- Create JAVA extension for alignment_intersection methods in order to increase perfomance
|
39
|
-
- Possibly algorithm shouldn't use hash but had two iterations: at first it determines possible hash scores for every length(if worst suffix is always zero, its flat space of scores at all pwm prefix lengths) of each pwm separately. And after that we can work with arrays which use such scores as indices via additional substructure
|
40
|
-
|
41
|
-
Usability issues:
|
42
|
-
make preprocess_collection be able to add information to existing collection of motifs. Make able to give collection a name from command line
|
43
|
-
|
44
|
-
remove .stdin placeholder. Use tty? method instead
|
45
|
-
|
46
|
-
use OptionParser or docopt
|
1
|
+
Why Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') on java works only for pvalues less or equal than 0.5, but 0.55 or 0.8 breaks the program (it doesn't stop). My hypothesis is that gauss threshold estimation fails
|
2
|
+
|
3
|
+
|
4
|
+
Make --same-strand mode which disallows revcomp transformation of motif - so it'll be possible to compare RNA-PWMs.
|
5
|
+
|
6
|
+
Fix align_motifs so that keys can go after arguments (use OptionParser in other words)
|
7
|
+
|
8
|
+
ToDo:
|
9
|
+
6)
|
10
|
+
# TODO: FIX: this test fails due to floating point precision error: estimated threshold is -19.0418 but '-19.0418'.to_f * 10000 = -190417.99999999997
|
11
|
+
# A workaround exists: we can use fractions, i.e. ('-19.0418'.to_r * 10000).to_f = -190418.0 but it obscures code and being used uncarefully can involve huge slowdown.
|
12
|
+
# I think, it'd be used only at input to workaround discretization issue
|
13
|
+
#
|
14
|
+
# def test_process_large_pvalue_floating_point_error
|
15
|
+
# pvalue, threshold, real_pvalue = nil, nil, nil
|
16
|
+
# assert_nothing_raised {
|
17
|
+
# pvalue, threshold, real_pvalue = Helpers.find_threshold_output('KLF4_f2.pwm -p 0.8').strip.split("\t")
|
18
|
+
# }
|
19
|
+
# assert_equal '0.8', pvalue
|
20
|
+
# assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm #{threshold}"), real_pvalue
|
21
|
+
# end
|
22
|
+
7)thresholds and thresholds_weak should return a collection (Array or Hash) when block not given
|
23
|
+
merge this two methods into one parametrized method
|
24
|
+
8)(TODO: for theoretically consistency, while making small inconsistences to old calculations)
|
25
|
+
When we work with strong threshold, we round matrix up(in order to overrate threshold comparing to real thus taking underrated pvalue) and take upper bound of discrete-thresholds fork.
|
26
|
+
When we are estimating lower bound of threshold (weak threshold) we take lower bound of fork of discrete thresholds. But we should ALSO (not done yet) take matrix discreted down! This'd allow us give exact answer on a question in which range real threshold should lay with given P-value, now we correctly estimate only lower bound of threshold(upper bound of P-value)
|
27
|
+
9) (may be) Option to specify predefined query motif threshold in scan_collection
|
28
|
+
10) Fix Readme!
|
29
|
+
|
30
|
+
Specs and tests:
|
31
|
+
create spec on use of MaxHashSize, MaxHashSizeDouble
|
32
|
+
create spec for testing case when {real_pvalue == 0, threshold == best_score + 1}
|
33
|
+
create test for nonuniform word-wise background([1,1,1,1]) and for different backgrounds
|
34
|
+
|
35
|
+
Ideas to increase perfomance:
|
36
|
+
- Add shifting matrix elements to zero after discreeting - in such case worst suffix is zero at all positions (??! it can significantly obscure code because thresholds will be changed too, and I can't tell what is better: slight perfomance optimization or conciseness of code)
|
37
|
+
- (?) Make rearrangment of rows by DIC decreasing in aligned pair of matrices before counting
|
38
|
+
- Create JAVA extension for alignment_intersection methods in order to increase perfomance
|
39
|
+
- Possibly algorithm shouldn't use hash but had two iterations: at first it determines possible hash scores for every length(if worst suffix is always zero, its flat space of scores at all pwm prefix lengths) of each pwm separately. And after that we can work with arrays which use such scores as indices via additional substructure
|
40
|
+
|
41
|
+
Usability issues:
|
42
|
+
make preprocess_collection be able to add information to existing collection of motifs. Make able to give collection a name from command line
|
43
|
+
|
44
|
+
remove .stdin placeholder. Use tty? method instead
|
45
|
+
|
46
|
+
use OptionParser or docopt
|
47
47
|
make options more uniform so that some of them were reusable(and the question: can I apply two option parsers consequently?)z
|
@@ -1,5 +1,5 @@
|
|
1
|
-
$bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
|
2
|
-
$LOAD_PATH.unshift $bioinform_folder
|
3
|
-
|
4
|
-
require 'benchmark'
|
1
|
+
$bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
|
2
|
+
$LOAD_PATH.unshift $bioinform_folder
|
3
|
+
|
4
|
+
require 'benchmark'
|
5
5
|
require_relative '../lib/macroape'
|