macroape 3.3.4 → 3.3.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +6 -2
- data/TODO.txt +1 -4
- data/lib/macroape/cli/eval_alignment.rb +8 -5
- data/lib/macroape/cli/eval_similarity.rb +7 -5
- data/lib/macroape/cli/find_pvalue.rb +4 -2
- data/lib/macroape/cli/find_threshold.rb +4 -3
- data/lib/macroape/cli/preprocess_collection.rb +1 -1
- data/lib/macroape/cli/scan_collection.rb +4 -2
- data/lib/macroape/version.rb +1 -1
- data/test/eval_alignment_test.rb +4 -0
- data/test/eval_similarity_test.rb +3 -0
- data/test/find_pvalue_test.rb +3 -0
- data/test/find_threshold_test.rb +5 -0
- data/test/scan_collection_test.rb +4 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -20,7 +20,7 @@ Or install it yourself as:
|
|
20
20
|
For more information read manual at https://docs.google.com/document/pub?id=1_jsxhMNzMzy4d2d_byAd3n6Szg5gEcqG_Sf7w9tEqWw (not last version but comprehensive description of approach)
|
21
21
|
|
22
22
|
## Basic usage as a command-line tool
|
23
|
-
MacroAPE have
|
23
|
+
MacroAPE have 7 command line tools:
|
24
24
|
|
25
25
|
### Tools for calculating thresholds and pvalues:
|
26
26
|
* find_threshold \<PWM file\> [-p \<pvalue\> (by default: 0.0005)]
|
@@ -34,8 +34,12 @@ Or install it yourself as:
|
|
34
34
|
* preprocess_collection \<folder with motif files\> [-o \<collection output file\>]
|
35
35
|
* scan_collection \<query PWM file\> \<collection file\>
|
36
36
|
|
37
|
+
### Tool for finding mutual alignment of several motifs relative to first(leader) motif. It's designed to use with sequence_logo to draw logos of clusters
|
38
|
+
* align_motifs \<pwm_leader\> \<pwm_2\> \<pwm_3\> ...
|
39
|
+
|
37
40
|
Also you can use -h option to print help for a tool in console.
|
38
41
|
There are lots of different command line options. Most useful option is -d <discretization=1|10|100|1000>. You can vary precision/speed rate by specifing a discretization. For more information look through a manual.
|
42
|
+
Some of tools also can process PCMs in addition to PWMs.
|
39
43
|
|
40
44
|
## Basic usage in your code
|
41
45
|
require 'macroape'
|
@@ -51,7 +55,7 @@ Or install it yourself as:
|
|
51
55
|
similarity_info = cmp.jaccard(first_threshold, second_threshold)
|
52
56
|
puts "Jaccard similarity: #{similarity_info[:similarity]}"
|
53
57
|
|
54
|
-
For more details look a source code of utilities in lib/
|
58
|
+
For more details look a source code of utilities in lib/macroape/cli/ folder
|
55
59
|
|
56
60
|
## Contributing
|
57
61
|
|
data/TODO.txt
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
Absolutely necessary:
|
2
|
-
(already work in preprocess_colleсtion and align_motifs) Make it available to load PCM files with (it should be first preprocessed to PWMs in a standardized way) -- may be it's better to use pipeline
|
3
|
-
|
4
1
|
Specs and tests:
|
5
2
|
create spec on use of MaxHashSize, MaxHashSizeDouble
|
6
3
|
create spec for testing case when {real_pvalue == 0, threshold == best_score + 1}
|
@@ -17,5 +14,5 @@ Usability issues:
|
|
17
14
|
|
18
15
|
remove .stdin placeholder. Use tty? method instead
|
19
16
|
|
20
|
-
use OptionParser
|
17
|
+
use OptionParser or docopt
|
21
18
|
make options more uniform so that some of them were reusable(and the question: can I apply two option parsers consequently?)
|
@@ -44,7 +44,8 @@ module Macroape
|
|
44
44
|
second_background = [1,1,1,1]
|
45
45
|
max_hash_size = 1000000
|
46
46
|
max_pair_hash_size = 1000
|
47
|
-
|
47
|
+
|
48
|
+
data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
|
48
49
|
|
49
50
|
first_file = argv.shift
|
50
51
|
second_file = argv.shift
|
@@ -93,18 +94,20 @@ module Macroape
|
|
93
94
|
parser = Bioinform::StringParser.new($stdin.read) if first_file == '.stdin' || second_file == '.stdin'
|
94
95
|
|
95
96
|
if first_file == '.stdin'
|
96
|
-
|
97
|
+
input_first = parser.parse
|
97
98
|
else
|
98
99
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
99
|
-
|
100
|
+
input_first = File.read(first_file)
|
100
101
|
end
|
102
|
+
pwm_first = data_model.new(input_first).to_pwm
|
101
103
|
|
102
104
|
if second_file == '.stdin'
|
103
|
-
|
105
|
+
input_second = parser.parse
|
104
106
|
else
|
105
107
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
106
|
-
|
108
|
+
input_second = File.read(second_file)
|
107
109
|
end
|
110
|
+
pwm_second = data_model.new(input_second).to_pwm
|
108
111
|
|
109
112
|
pwm_first.background!(first_background).max_hash_size!(max_hash_size).discrete!(discretization)
|
110
113
|
pwm_second.background!(second_background).max_hash_size!(max_hash_size).discrete!(discretization)
|
@@ -47,7 +47,7 @@ module Macroape
|
|
47
47
|
max_hash_size = 1000000
|
48
48
|
max_pair_hash_size = 1000
|
49
49
|
|
50
|
-
|
50
|
+
data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
|
51
51
|
first_file = argv.shift
|
52
52
|
second_file = argv.shift
|
53
53
|
raise "You'd specify two input sources (each is filename or .stdin)" unless first_file and second_file
|
@@ -76,18 +76,20 @@ module Macroape
|
|
76
76
|
parser = Bioinform::StringParser.new($stdin.read) if first_file == '.stdin' || second_file == '.stdin'
|
77
77
|
|
78
78
|
if first_file == '.stdin'
|
79
|
-
|
79
|
+
input_first = parser.parse
|
80
80
|
else
|
81
81
|
raise "Error! File #{first_file} don't exist" unless File.exist?(first_file)
|
82
|
-
|
82
|
+
input_first = File.read(first_file)
|
83
83
|
end
|
84
|
+
pwm_first = data_model.new(input_first).to_pwm
|
84
85
|
|
85
86
|
if second_file == '.stdin'
|
86
|
-
|
87
|
+
input_second = parser.parse
|
87
88
|
else
|
88
89
|
raise "Error! File #{second_file} don't exist" unless File.exist?(second_file)
|
89
|
-
|
90
|
+
input_second = File.read(second_file)
|
90
91
|
end
|
92
|
+
pwm_second = data_model.new(input_second).to_pwm
|
91
93
|
|
92
94
|
pwm_first.background!(first_background).max_hash_size!(max_hash_size).discrete!(discretization)
|
93
95
|
pwm_second.background!(second_background).max_hash_size!(max_hash_size).discrete!(discretization)
|
@@ -41,6 +41,7 @@ module Macroape
|
|
41
41
|
thresholds = []
|
42
42
|
max_hash_size = 1000000
|
43
43
|
|
44
|
+
data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
|
44
45
|
filename = argv.shift
|
45
46
|
|
46
47
|
loop do
|
@@ -68,11 +69,12 @@ module Macroape
|
|
68
69
|
|
69
70
|
|
70
71
|
if filename == '.stdin'
|
71
|
-
|
72
|
+
input = $stdin.read
|
72
73
|
else
|
73
74
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
74
|
-
|
75
|
+
input = File.read(filename)
|
75
76
|
end
|
77
|
+
pwm = data_model.new(input).to_pwm
|
76
78
|
pwm.background!(background).max_hash_size!(max_hash_size).discrete!(discretization)
|
77
79
|
|
78
80
|
counts = pwm.counts_by_thresholds(* thresholds.map{|count| count * discretization})
|
@@ -36,6 +36,7 @@ module Macroape
|
|
36
36
|
default_pvalues = [0.0005]
|
37
37
|
discretization = 10000
|
38
38
|
max_hash_size = 1000000
|
39
|
+
data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
|
39
40
|
|
40
41
|
filename = argv.shift
|
41
42
|
raise "No input. You'd specify input source: filename or .stdin" unless filename
|
@@ -63,12 +64,12 @@ module Macroape
|
|
63
64
|
pvalues = default_pvalues if pvalues.empty?
|
64
65
|
|
65
66
|
if filename == '.stdin'
|
66
|
-
|
67
|
+
input = $stdin.read
|
67
68
|
else
|
68
69
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
69
|
-
|
70
|
+
input = File.read(filename)
|
70
71
|
end
|
71
|
-
|
72
|
+
pwm = data_model.new(input).to_pwm
|
72
73
|
pwm.background!(background).max_hash_size!(max_hash_size).discrete!(discretization)
|
73
74
|
|
74
75
|
pwm.thresholds(*pvalues) do |pvalue, threshold, real_pvalue|
|
@@ -74,7 +74,7 @@ module Macroape
|
|
74
74
|
collection = Macroape::Collection.new(rough_discretization, precise_discretization, background, pvalues)
|
75
75
|
|
76
76
|
if File.directory?(data_source)
|
77
|
-
|
77
|
+
motifs = Dir.glob(File.join(data_source,'*')).map do |filename|
|
78
78
|
pwm = data_model.new(File.read(filename))
|
79
79
|
pwm.name ||= File.basename(filename, File.extname(filename))
|
80
80
|
pwm
|
@@ -36,6 +36,7 @@ module Macroape
|
|
36
36
|
exit
|
37
37
|
end
|
38
38
|
|
39
|
+
data_model = argv.delete('--pcm') ? Bioinform::PCM : Bioinform::PWM
|
39
40
|
filename = argv.shift
|
40
41
|
collection_file = argv.shift
|
41
42
|
raise "No input. You'd specify input source for pat: filename or .stdin" unless filename
|
@@ -82,12 +83,13 @@ module Macroape
|
|
82
83
|
raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
|
83
84
|
|
84
85
|
if filename == '.stdin'
|
85
|
-
|
86
|
+
query_input = $stdin.read
|
86
87
|
else
|
87
88
|
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
88
|
-
|
89
|
+
query_input = File.read(filename)
|
89
90
|
end
|
90
91
|
|
92
|
+
query_pwm = data_model.new(query_input).to_pwm
|
91
93
|
query_pwm.background(background_query).max_hash_size(max_hash_size)
|
92
94
|
|
93
95
|
query_pwm_rough = query_pwm.discrete(collection.rough_discretization)
|
data/lib/macroape/version.rb
CHANGED
data/test/eval_alignment_test.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
3
|
class TestEvalAlignment < Test::Unit::TestCase
|
4
|
+
def test_process_pcm_files
|
5
|
+
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm -1 direct --pcm')
|
6
|
+
end
|
7
|
+
|
4
8
|
def test_process_at_optimal_alignment
|
5
9
|
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')
|
6
10
|
end
|
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
3
|
class TestEvalSimilarity < Test::Unit::TestCase
|
4
|
+
def test_process_pair_of_pcms
|
5
|
+
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm --pcm')
|
6
|
+
end
|
4
7
|
def test_process_pair_of_pwms
|
5
8
|
assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat')
|
6
9
|
end
|
data/test/find_pvalue_test.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
3
|
class FindPvalueTest < Test::Unit::TestCase
|
4
|
+
def test_process_pcm
|
5
|
+
assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pcm 4.1719 --pcm')
|
6
|
+
end
|
4
7
|
def test_process_one_threshold
|
5
8
|
assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 4.1719')
|
6
9
|
end
|
data/test/find_threshold_test.rb
CHANGED
@@ -10,6 +10,11 @@ class FindThresholdTest < Test::Unit::TestCase
|
|
10
10
|
}
|
11
11
|
assert_equal pvalues, ['0.0005', '0.001']
|
12
12
|
end
|
13
|
+
def test_process_pcm
|
14
|
+
pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pcm -p 0.001 --pcm').strip.split("\t")
|
15
|
+
assert_equal '0.001', pvalue
|
16
|
+
assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
|
17
|
+
end
|
13
18
|
def test_process_one_pvalue
|
14
19
|
pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat -p 0.001').strip.split("\t")
|
15
20
|
assert_equal '0.001', pvalue
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
|
3
3
|
class TestScanCollection < Test::Unit::TestCase
|
4
|
+
def test_scan_pcm
|
5
|
+
assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
|
6
|
+
Helpers.scan_collection_output('test/data/KLF4_f2.pcm test/data/test_collection.yaml --silent --pcm').gsub("\r\n","\n")
|
7
|
+
end
|
4
8
|
def test_scan_default_cutoff
|
5
9
|
assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
|
6
10
|
Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --silent').gsub("\r\n","\n")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macroape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bioinform
|