macroape 4.0.2 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +4 -4
- data/LICENSE +22 -22
- data/README.md +70 -70
- data/Rakefile.rb +49 -49
- data/TODO.txt +46 -46
- data/benchmark/benchmark_helper.rb +4 -4
- data/benchmark/similarity_benchmark.rb +52 -52
- data/bin/align_motifs +4 -4
- data/bin/eval_alignment +4 -4
- data/bin/eval_similarity +4 -4
- data/bin/find_pvalue +4 -4
- data/bin/find_threshold +4 -4
- data/bin/preprocess_collection +4 -4
- data/bin/scan_collection +4 -4
- data/lib/macroape.rb +14 -11
- data/lib/macroape/aligned_pair_intersection.rb +61 -62
- data/lib/macroape/cli.rb +191 -188
- data/lib/macroape/cli/align_motifs.rb +120 -100
- data/lib/macroape/cli/eval_alignment.rb +157 -156
- data/lib/macroape/cli/eval_similarity.rb +138 -137
- data/lib/macroape/cli/find_pvalue.rb +93 -87
- data/lib/macroape/cli/find_threshold.rb +103 -96
- data/lib/macroape/cli/preprocess_collection.rb +169 -161
- data/lib/macroape/cli/scan_collection.rb +171 -163
- data/lib/macroape/collection.rb +29 -0
- data/lib/macroape/motif_with_thresholds.rb +18 -0
- data/lib/macroape/pwm_compare.rb +39 -44
- data/lib/macroape/pwm_compare_aligned.rb +139 -130
- data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
- data/lib/macroape/support/inverf.rb +13 -0
- data/lib/macroape/support/partial_sums.rb +17 -0
- data/lib/macroape/version.rb +4 -4
- data/macroape.gemspec +19 -19
- data/spec/count_distribution_spec.rb +112 -109
- data/spec/inverf_spec.rb +23 -0
- data/spec/partial_sums_spec.rb +28 -0
- data/spec/spec_helper.rb +11 -11
- data/test/align_motifs_test.rb +42 -43
- data/test/data/AHR_si.pwm +10 -10
- data/test/data/KLF3_f1.pcm +16 -16
- data/test/data/KLF3_f1.pwm +16 -16
- data/test/data/KLF4_f2.pcm +11 -11
- data/test/data/KLF4_f2.pwm +11 -11
- data/test/data/KLF4_f2_scan_results_all.txt +2 -2
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
- data/test/data/SP1_f1.pcm +12 -12
- data/test/data/SP1_f1.pwm +12 -12
- data/test/data/SP1_f1_revcomp.pcm +12 -12
- data/test/data/SP1_f1_revcomp.pwm +12 -12
- data/test/data/medium_motif.pwm +8 -8
- data/test/data/short_motif.pwm +7 -7
- data/test/data/test_collection.yaml +231 -214
- data/test/data/test_collection/GABPA_f1.pwm +14 -14
- data/test/data/test_collection/KLF4_f2.pwm +10 -10
- data/test/data/test_collection/SP1_f1.pwm +12 -12
- data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
- data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
- data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
- data/test/data/test_collection_single_file.txt +38 -38
- data/test/data/test_collection_single_file_pcm.txt +37 -37
- data/test/data/test_collection_weak.yaml +231 -214
- data/test/eval_alignment_test.rb +90 -111
- data/test/eval_similarity_test.rb +105 -123
- data/test/find_pvalue_test.rb +34 -39
- data/test/find_threshold_test.rb +87 -91
- data/test/preprocess_collection_test.rb +56 -65
- data/test/scan_collection_test.rb +42 -48
- data/test/test_helper.rb +159 -160
- metadata +14 -10
- data/test/data/collection_pcm_without_thresholds.yaml +0 -188
- data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,163 +1,171 @@
|
|
1
|
-
require_relative '../../macroape'
|
2
|
-
require 'yaml'
|
3
|
-
|
4
|
-
module Macroape
|
5
|
-
module CLI
|
6
|
-
module ScanCollection
|
7
|
-
def self.main(argv)
|
8
|
-
doc = <<-EOS.strip_doc
|
9
|
-
Command-line format:
|
10
|
-
#{run_tool_cmd} <pat-file> <collection> [options]
|
11
|
-
|
12
|
-
Options:
|
13
|
-
[-p <P-value>]
|
14
|
-
[-c <similarity cutoff>] minimal similarity to be included in output, '-c 0.05' by default, [--all] to print all results
|
15
|
-
[--precise [<level>]] minimal similarity to check on the second pass in precise mode, off by default, '--precise 0.01' if level is not set
|
16
|
-
[--silent] - hide current progress information during scan (printed to stderr by default)
|
17
|
-
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
18
|
-
[--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
|
19
|
-
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
20
|
-
|
21
|
-
Output format:
|
22
|
-
<name> <jaccard index> <shift> <overlap> <orientation> ['*' in case that result was calculated on the second pass (in precise mode), '.' otherwise]
|
23
|
-
Attention! Name can contain whitespace characters.
|
24
|
-
Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
|
25
|
-
|
26
|
-
Example:
|
27
|
-
#{run_tool_cmd} motifs/KLF4_f2.pat hocomoco_ad_uniform.yaml
|
28
|
-
#{run_tool_cmd} motifs/KLF4_f2.pat hocomoco_ad_uniform.yaml -p 0.0005 --precise 0.03
|
29
|
-
EOS
|
30
|
-
|
31
|
-
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
32
|
-
$stderr.puts doc
|
33
|
-
exit
|
34
|
-
end
|
35
|
-
|
36
|
-
data_model = argv.delete('--pcm') ?
|
37
|
-
filename = argv.shift
|
38
|
-
collection_file = argv.shift
|
39
|
-
raise 'No input. You should specify input file with matrix' unless filename
|
40
|
-
raise 'No input. You should specify input file with collection' unless collection_file
|
41
|
-
raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
|
42
|
-
|
43
|
-
pvalue = 0.0005
|
44
|
-
cutoff = 0.05 # minimal similarity to output
|
45
|
-
collection = YAML.load_file(collection_file)
|
46
|
-
collection_background = collection.
|
47
|
-
query_background = collection_background
|
48
|
-
|
49
|
-
rough_discretization = collection.
|
50
|
-
precise_discretization = collection.
|
51
|
-
max_hash_size = 10000000
|
52
|
-
max_pair_hash_size = 10000
|
53
|
-
pvalue_boundary = :upper
|
54
|
-
|
55
|
-
silent = false
|
56
|
-
precision_mode = :rough
|
57
|
-
until argv.empty?
|
58
|
-
case argv.shift
|
59
|
-
when '-b'
|
60
|
-
query_background = argv.shift
|
61
|
-
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless query_background
|
62
|
-
when '-p'
|
63
|
-
pvalue = argv.shift.to_f
|
64
|
-
when '--max-hash-size'
|
65
|
-
max_hash_size = argv.shift.to_i
|
66
|
-
when '--max-2d-hash-size'
|
67
|
-
max_pair_hash_size = argv.shift.to_i
|
68
|
-
when '-c'
|
69
|
-
cutoff = argv.shift.to_f
|
70
|
-
when '--all'
|
71
|
-
cutoff = 0.0
|
72
|
-
when '--silent'
|
73
|
-
silent = true
|
74
|
-
when '--boundary'
|
75
|
-
pvalue_boundary = argv.shift.to_sym
|
76
|
-
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
77
|
-
when '--precise'
|
78
|
-
precision_mode = :precise
|
79
|
-
begin
|
80
|
-
Float(argv.first)
|
81
|
-
minimal_similarity = argv.shift.to_f
|
82
|
-
rescue
|
83
|
-
minimal_similarity = 0.05
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
info = Macroape::PWMCompare.new(
|
138
|
-
info[:precision_mode] = :
|
139
|
-
end
|
140
|
-
info[:
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
1
|
+
require_relative '../../macroape'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Macroape
|
5
|
+
module CLI
|
6
|
+
module ScanCollection
|
7
|
+
def self.main(argv)
|
8
|
+
doc = <<-EOS.strip_doc
|
9
|
+
Command-line format:
|
10
|
+
#{run_tool_cmd} <pat-file> <collection> [options]
|
11
|
+
|
12
|
+
Options:
|
13
|
+
[-p <P-value>]
|
14
|
+
[-c <similarity cutoff>] minimal similarity to be included in output, '-c 0.05' by default, [--all] to print all results
|
15
|
+
[--precise [<level>]] minimal similarity to check on the second pass in precise mode, off by default, '--precise 0.01' if level is not set
|
16
|
+
[--silent] - hide current progress information during scan (printed to stderr by default)
|
17
|
+
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
|
18
|
+
[--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value
|
19
|
+
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
|
20
|
+
|
21
|
+
Output format:
|
22
|
+
<name> <jaccard index> <shift> <overlap> <orientation> ['*' in case that result was calculated on the second pass (in precise mode), '.' otherwise]
|
23
|
+
Attention! Name can contain whitespace characters.
|
24
|
+
Attention! The shift and orientation are reported for the collection matrix relative to the query matrix.
|
25
|
+
|
26
|
+
Example:
|
27
|
+
#{run_tool_cmd} motifs/KLF4_f2.pat hocomoco_ad_uniform.yaml
|
28
|
+
#{run_tool_cmd} motifs/KLF4_f2.pat hocomoco_ad_uniform.yaml -p 0.0005 --precise 0.03
|
29
|
+
EOS
|
30
|
+
|
31
|
+
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
|
32
|
+
$stderr.puts doc
|
33
|
+
exit
|
34
|
+
end
|
35
|
+
|
36
|
+
data_model = argv.delete('--pcm') ? :pcm : :pwm
|
37
|
+
filename = argv.shift
|
38
|
+
collection_file = argv.shift
|
39
|
+
raise 'No input. You should specify input file with matrix' unless filename
|
40
|
+
raise 'No input. You should specify input file with collection' unless collection_file
|
41
|
+
raise "Collection file #{collection_file} doesn't exist" unless File.exist?(collection_file)
|
42
|
+
|
43
|
+
pvalue = 0.0005
|
44
|
+
cutoff = 0.05 # minimal similarity to output
|
45
|
+
collection = YAML.load_file(collection_file)
|
46
|
+
collection_background = collection.background #(collection.background == [1,1,1,1]) ? Bioinform::Background::Wordwise : Bioinform::Frequencies.new(collection.background)
|
47
|
+
query_background = collection_background
|
48
|
+
|
49
|
+
rough_discretization = collection.rough_discretization
|
50
|
+
precise_discretization = collection.precise_discretization
|
51
|
+
max_hash_size = 10000000
|
52
|
+
max_pair_hash_size = 10000
|
53
|
+
pvalue_boundary = :upper
|
54
|
+
|
55
|
+
silent = false
|
56
|
+
precision_mode = :rough
|
57
|
+
until argv.empty?
|
58
|
+
case argv.shift
|
59
|
+
when '-b'
|
60
|
+
query_background = Bioinform::Background.from_string(argv.shift)
|
61
|
+
raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless query_background.symmetric?
|
62
|
+
when '-p'
|
63
|
+
pvalue = argv.shift.to_f
|
64
|
+
when '--max-hash-size'
|
65
|
+
max_hash_size = argv.shift.to_i
|
66
|
+
when '--max-2d-hash-size'
|
67
|
+
max_pair_hash_size = argv.shift.to_i
|
68
|
+
when '-c'
|
69
|
+
cutoff = argv.shift.to_f
|
70
|
+
when '--all'
|
71
|
+
cutoff = 0.0
|
72
|
+
when '--silent'
|
73
|
+
silent = true
|
74
|
+
when '--boundary'
|
75
|
+
pvalue_boundary = argv.shift.to_sym
|
76
|
+
raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper
|
77
|
+
when '--precise'
|
78
|
+
precision_mode = :precise
|
79
|
+
begin
|
80
|
+
Float(argv.first)
|
81
|
+
minimal_similarity = argv.shift.to_f
|
82
|
+
rescue
|
83
|
+
minimal_similarity = 0.05
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
raise "Thresholds for pvalue #{pvalue} aren't presented in collection (#{collection.pvalues.join(', ')}). Use one of listed pvalues or recalculate the collection with needed pvalue" unless collection.pvalues.include? pvalue
|
89
|
+
|
90
|
+
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
|
91
|
+
query_input = File.read(filename)
|
92
|
+
|
93
|
+
query_input = Bioinform::MatrixParser.new.parse!(query_input)
|
94
|
+
case data_model
|
95
|
+
when :pcm
|
96
|
+
query_pcm = Bioinform::MotifModel::PCM.new(query_input[:matrix]).named(query_input[:name])
|
97
|
+
query_pwm = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: query_background).convert(query_pcm)
|
98
|
+
when :pwm
|
99
|
+
query_pwm = Bioinform::MotifModel::PWM.new(query_input[:matrix]).named(query_input[:name])
|
100
|
+
end
|
101
|
+
|
102
|
+
query_pwm_rough = query_pwm.discreted(rough_discretization)
|
103
|
+
query_pwm_rough_counting = PWMCounting.new(query_pwm_rough, background: query_background, max_hash_size: max_hash_size)
|
104
|
+
query_pwm_precise = query_pwm.discreted(precise_discretization)
|
105
|
+
query_pwm_precise_counting = PWMCounting.new(query_pwm_precise, background: query_background, max_hash_size: max_hash_size)
|
106
|
+
|
107
|
+
if pvalue_boundary == :lower
|
108
|
+
query_threshold_rough, query_rough_real_pvalue = query_pwm_rough_counting.threshold_and_real_pvalue(pvalue)
|
109
|
+
query_threshold_precise, query_precise_real_pvalue = query_pwm_precise_counting.threshold_and_real_pvalue(pvalue)
|
110
|
+
else
|
111
|
+
query_threshold_rough, query_rough_real_pvalue = query_pwm_rough_counting.weak_threshold_and_real_pvalue(pvalue)
|
112
|
+
query_threshold_precise, query_precise_real_pvalue = query_pwm_precise_counting.weak_threshold_and_real_pvalue(pvalue)
|
113
|
+
end
|
114
|
+
|
115
|
+
if query_precise_real_pvalue == 0
|
116
|
+
$stderr.puts "Query motif #{query_pwm.name} gives 0 recognized words for a given P-value of #{pvalue} with the precise discretization level of #{precise_discretization}. It's impossible to scan collection for this motif"
|
117
|
+
return
|
118
|
+
end
|
119
|
+
|
120
|
+
if query_rough_real_pvalue == 0
|
121
|
+
query_pwm_rough_counting, query_threshold_rough = query_pwm_precise_counting, query_threshold_precise
|
122
|
+
$stderr.puts "Query motif #{query_pwm.name} gives 0 recognized words for a given P-value of #{pvalue} with the rough discretization level of #{rough_discretization}. Forcing precise discretization level of #{precise_discretization}"
|
123
|
+
end
|
124
|
+
|
125
|
+
similarities = {}
|
126
|
+
precision_file_mode = {}
|
127
|
+
|
128
|
+
collection.motifs.each_with_index do |motif_info, index|
|
129
|
+
motif = motif_info.model
|
130
|
+
$stderr.puts "Testing motif #{motif.name} (#{index+1} of #{collection.size}, #{index*100/collection.size}% complete)" unless silent
|
131
|
+
|
132
|
+
if motif_info.rough[pvalue]
|
133
|
+
collection_pwm_rough = motif.discreted(rough_discretization)
|
134
|
+
collection_pwm_rough_counting = Macroape::PWMCounting.new(collection_pwm_rough, background: collection_background, max_hash_size: max_hash_size)
|
135
|
+
|
136
|
+
collection_threshold_rough = motif_info.rough[pvalue] * rough_discretization
|
137
|
+
info = Macroape::PWMCompare.new(query_pwm_rough_counting, collection_pwm_rough_counting).tap{|x| x.max_pair_hash_size = max_pair_hash_size }.jaccard(query_threshold_rough, collection_threshold_rough)
|
138
|
+
info[:precision_mode] = :rough
|
139
|
+
end
|
140
|
+
if !motif_info.rough[pvalue] || (precision_mode == :precise) && (info[:similarity] >= minimal_similarity)
|
141
|
+
collection_pwm_precise = motif.discreted(precise_discretization)
|
142
|
+
collection_pwm_precise_counting = Macroape::PWMCounting.new(collection_pwm_precise, background: collection_background, max_hash_size: max_hash_size)
|
143
|
+
|
144
|
+
collection_threshold_precise = motif_info.precise[pvalue] * precise_discretization
|
145
|
+
info = Macroape::PWMCompare.new(query_pwm_precise_counting, collection_pwm_precise_counting).tap{|x| x.max_pair_hash_size = max_pair_hash_size }.jaccard(query_threshold_precise, collection_threshold_precise)
|
146
|
+
info[:precision_mode] = :precise
|
147
|
+
end
|
148
|
+
info[:name] = motif.name
|
149
|
+
similarities[motif.name] = info
|
150
|
+
end
|
151
|
+
|
152
|
+
$stderr.puts "100% complete" unless silent
|
153
|
+
|
154
|
+
similarities_to_output = similarities.sort_by{|name, info| info[:similarity] }.reverse.select{|name,info| info[:similarity] >= cutoff }.map{|name,info|info}
|
155
|
+
puts Helper.scan_collection_infos_string( similarities_to_output,
|
156
|
+
{cutoff: cutoff,
|
157
|
+
precision_mode: precision_mode,
|
158
|
+
rough_discretization: rough_discretization,
|
159
|
+
precise_discretization: precise_discretization,
|
160
|
+
minimal_similarity: minimal_similarity,
|
161
|
+
pvalue: pvalue,
|
162
|
+
pvalue_boundary: pvalue_boundary,
|
163
|
+
collection_background: collection_background,
|
164
|
+
query_background: query_background} )
|
165
|
+
rescue => err
|
166
|
+
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Macroape
|
2
|
+
class Collection
|
3
|
+
attr_accessor :motifs, :rough_discretization, :precise_discretization, :background, :pvalues
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
@motifs = options[:motifs] || []
|
7
|
+
@rough_discretization = options[:rough_discretization]
|
8
|
+
@precise_discretization = options[:precise_discretization]
|
9
|
+
@background = options[:background]
|
10
|
+
@pvalues = options[:pvalues]
|
11
|
+
end
|
12
|
+
|
13
|
+
def ==(other)
|
14
|
+
(motifs == other.motifs) &&
|
15
|
+
(rough_discretization == other.rough_discretization) &&
|
16
|
+
(precise_discretization == other.precise_discretization) &&
|
17
|
+
(background == other.background) &&
|
18
|
+
(pvalues == other.pvalues)
|
19
|
+
end
|
20
|
+
|
21
|
+
def <<(motif_with_thresholds)
|
22
|
+
@motifs << motif_with_thresholds
|
23
|
+
end
|
24
|
+
|
25
|
+
def size
|
26
|
+
motifs.size
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Macroape
|
2
|
+
class MotifWithThresholds
|
3
|
+
attr_accessor :model
|
4
|
+
attr_accessor :rough, :precise
|
5
|
+
|
6
|
+
def initialize(model, options = {})
|
7
|
+
@model = model
|
8
|
+
@rough = options[:rough]
|
9
|
+
@precise = options[:precise]
|
10
|
+
end
|
11
|
+
|
12
|
+
def ==(other)
|
13
|
+
(model == other.model) &&
|
14
|
+
(rough == other.rough) &&
|
15
|
+
(precise == other.precise)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/macroape/pwm_compare.rb
CHANGED
@@ -1,44 +1,39 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
include Enumerable
|
41
|
-
alias_method :each, :each_alignment
|
42
|
-
alias_method :map_each_alignment, :map
|
43
|
-
end
|
44
|
-
end
|
1
|
+
module Macroape
|
2
|
+
class PWMCompare
|
3
|
+
# sets or gets limit of summary size of calculation hash. It's a defence against overuse CPU resources by non-appropriate data
|
4
|
+
attr_accessor :max_pair_hash_size
|
5
|
+
attr_reader :first, :second
|
6
|
+
def initialize(first, second)
|
7
|
+
@first = first
|
8
|
+
@second = second
|
9
|
+
end
|
10
|
+
|
11
|
+
def jaccard(threshold_first, threshold_second)
|
12
|
+
self.map_each_alignment do |alignment|
|
13
|
+
alignment.alignment_infos.merge( alignment.jaccard(threshold_first, threshold_second) )
|
14
|
+
end.max_by {|alignment_infos| alignment_infos[:similarity] }
|
15
|
+
end
|
16
|
+
|
17
|
+
def jaccard_by_pvalue(pvalue)
|
18
|
+
threshold_first = first.threshold(pvalue)
|
19
|
+
threshold_second = second.threshold(pvalue)
|
20
|
+
jaccard(threshold_first, threshold_second)
|
21
|
+
end
|
22
|
+
|
23
|
+
def jaccard_by_weak_pvalue(pvalue)
|
24
|
+
threshold_first = first.weak_threshold(pvalue)
|
25
|
+
threshold_second = second.weak_threshold(pvalue)
|
26
|
+
jaccard(threshold_first, threshold_second)
|
27
|
+
end
|
28
|
+
|
29
|
+
def each_alignment
|
30
|
+
(-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
|
31
|
+
yield PWMCompareAligned.new(first, second, shift, orientation).tap{|x| x.max_pair_hash_size = max_pair_hash_size }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
include Enumerable
|
36
|
+
alias_method :each, :each_alignment
|
37
|
+
alias_method :map_each_alignment, :map
|
38
|
+
end
|
39
|
+
end
|