bioinform 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -2
- data/Gemfile +1 -0
- data/Gemfile.lock +84 -0
- data/TODO.txt +5 -1
- data/lib/bioinform/background.rb +2 -0
- data/lib/bioinform/cli/convert_motif.rb +3 -4
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +6 -5
- data/lib/bioinform/data_models/pcm.rb +25 -11
- data/lib/bioinform/data_models/pm.rb +34 -31
- data/lib/bioinform/data_models/ppm.rb +22 -5
- data/lib/bioinform/data_models/pwm.rb +12 -4
- data/lib/bioinform/errors.rb +8 -1
- data/lib/bioinform/validator.rb +86 -0
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +5 -5
- data/spec/background_spec.rb +4 -4
- data/spec/cli/cli_spec.rb +19 -0
- data/spec/cli/pcm2pwm_spec.rb +10 -10
- data/spec/converters/pcm2ppm_converter_spec.rb +4 -4
- data/spec/converters/pcm2pwm_converter_spec.rb +16 -6
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +5 -5
- data/spec/converters/pwm2pcm_converter_spec.rb +3 -3
- data/spec/data_models/pcm_spec.rb +7 -4
- data/spec/data_models/pm_spec.rb +5 -5
- data/spec/data_models/ppm_spec.rb +4 -4
- data/spec/data_models/pwm_spec.rb +4 -3
- data/spec/formatters/consensus_formatter_spec.rb +1 -1
- data/spec/formatters/raw_formatter_spec.rb +2 -2
- data/spec/parsers/matrix_parser_spec.rb +8 -8
- data/spec/spec_helper_source.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64c5e6fe652d85bd538d08020e5d06779e676fa7
|
4
|
+
data.tar.gz: 954a02afb784d086d006e59e97f57400a10f7858
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3e549ad3b7b20a45ba37dc90c25660af7f1a455cf358aced90991570b76070da5688872141a32e54590a6cd0e65622fe2010b9b27b34466886b15f5a6b7856e
|
7
|
+
data.tar.gz: 81e3dba3f91e3f4acc6838efe5e197da2712f070a4962e3d29bd0df81a0834eb513542ecfcbaca62802cd32fdaa90e883c356d1a008abde9715e5183e52501ee
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
bioinform (0.3.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
coderay (1.1.0)
|
10
|
+
diff-lcs (1.2.5)
|
11
|
+
fakefs (0.4.3)
|
12
|
+
ffi (1.9.10)
|
13
|
+
formatador (0.2.5)
|
14
|
+
given_core (3.7.1)
|
15
|
+
sorcerer (>= 0.3.7)
|
16
|
+
guard (2.13.0)
|
17
|
+
formatador (>= 0.2.4)
|
18
|
+
listen (>= 2.7, <= 4.0)
|
19
|
+
lumberjack (~> 1.0)
|
20
|
+
nenv (~> 0.1)
|
21
|
+
notiffany (~> 0.0)
|
22
|
+
pry (>= 0.9.12)
|
23
|
+
shellany (~> 0.0)
|
24
|
+
thor (>= 0.18.1)
|
25
|
+
guard-compat (1.2.1)
|
26
|
+
guard-rspec (4.6.4)
|
27
|
+
guard (~> 2.1)
|
28
|
+
guard-compat (~> 1.1)
|
29
|
+
rspec (>= 2.99.0, < 4.0)
|
30
|
+
listen (3.0.5)
|
31
|
+
rb-fsevent (>= 0.9.3)
|
32
|
+
rb-inotify (>= 0.9)
|
33
|
+
lumberjack (1.0.9)
|
34
|
+
method_source (0.8.2)
|
35
|
+
nenv (0.2.0)
|
36
|
+
notiffany (0.0.8)
|
37
|
+
nenv (~> 0.1)
|
38
|
+
shellany (~> 0.0)
|
39
|
+
pry (0.10.3)
|
40
|
+
coderay (~> 1.1.0)
|
41
|
+
method_source (~> 0.8.1)
|
42
|
+
slop (~> 3.4)
|
43
|
+
rake (10.4.2)
|
44
|
+
rb-fsevent (0.9.6)
|
45
|
+
rb-inotify (0.9.5)
|
46
|
+
ffi (>= 0.5.0)
|
47
|
+
rspec (3.4.0)
|
48
|
+
rspec-core (~> 3.4.0)
|
49
|
+
rspec-expectations (~> 3.4.0)
|
50
|
+
rspec-mocks (~> 3.4.0)
|
51
|
+
rspec-core (3.4.1)
|
52
|
+
rspec-support (~> 3.4.0)
|
53
|
+
rspec-expectations (3.4.0)
|
54
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
55
|
+
rspec-support (~> 3.4.0)
|
56
|
+
rspec-given (3.7.1)
|
57
|
+
given_core (= 3.7.1)
|
58
|
+
rspec (>= 2.14.0)
|
59
|
+
rspec-mocks (3.4.0)
|
60
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
61
|
+
rspec-support (~> 3.4.0)
|
62
|
+
rspec-support (3.4.1)
|
63
|
+
shellany (0.0.1)
|
64
|
+
slop (3.6.0)
|
65
|
+
sorcerer (1.0.2)
|
66
|
+
spork (0.9.2)
|
67
|
+
thor (0.19.1)
|
68
|
+
wdm (0.1.1)
|
69
|
+
|
70
|
+
PLATFORMS
|
71
|
+
ruby
|
72
|
+
|
73
|
+
DEPENDENCIES
|
74
|
+
bioinform!
|
75
|
+
fakefs (~> 0.4.2)
|
76
|
+
guard-rspec (>= 2.1.0)
|
77
|
+
rake (~> 10.4)
|
78
|
+
rspec (~> 3.0)
|
79
|
+
rspec-given (>= 2.0.0)
|
80
|
+
spork (>= 0.9.2)
|
81
|
+
wdm
|
82
|
+
|
83
|
+
BUNDLED WITH
|
84
|
+
1.11.2
|
data/TODO.txt
CHANGED
@@ -32,4 +32,8 @@ Decide:
|
|
32
32
|
Specs
|
33
33
|
-- PWM#probabilities, #score_variance, #gauss_estimation
|
34
34
|
|
35
|
-
background#to_s and WordwiseBackground#to_s
|
35
|
+
background#to_s and WordwiseBackground#to_s
|
36
|
+
|
37
|
+
SNP should be indexed both by sequence start and relative to snp position
|
38
|
+
|
39
|
+
Интеграция PCM/PWM с iRuby: вывод лого в форме svg/png, печать матриц в tex-формате
|
data/lib/bioinform/background.rb
CHANGED
@@ -50,6 +50,8 @@ module Bioinform
|
|
50
50
|
include Bioinform::Background
|
51
51
|
def initialize(frequencies)
|
52
52
|
@frequencies = frequencies
|
53
|
+
raise Error, 'Frequencies should have 4 components' unless frequencies.length == 4
|
54
|
+
raise Error, 'Frequencies should be in [0;1]' unless frequencies.all?{|el| (0..1).include?(el) }
|
53
55
|
raise Error, 'Sum of Background frequencies should be equal to 1' unless (frequencies.inject(0.0, &:+) - 1.0).abs < 1e-4
|
54
56
|
end
|
55
57
|
|
@@ -27,14 +27,13 @@ module Bioinform
|
|
27
27
|
output_motifs = []
|
28
28
|
motifs = motif_files.map do |filename|
|
29
29
|
input = File.read(filename)
|
30
|
-
motif_info = MotifParser.new.parse(input)
|
31
30
|
case options[:model_from]
|
32
31
|
when 'pwm'
|
33
|
-
MotifModel::PWM.
|
32
|
+
MotifModel::PWM.from_string(input)
|
34
33
|
when 'pcm'
|
35
|
-
MotifModel::PCM.
|
34
|
+
MotifModel::PCM.from_string(input)
|
36
35
|
when 'ppm'
|
37
|
-
MotifModel::PPM.
|
36
|
+
MotifModel::PPM.from_string(input)
|
38
37
|
else
|
39
38
|
raise "Unknown value of model-from parameter: `#{options[:model_from]}`"
|
40
39
|
end
|
@@ -12,16 +12,17 @@ module Bioinform
|
|
12
12
|
@pseudocount = options.fetch(:pseudocount, :log)
|
13
13
|
end
|
14
14
|
|
15
|
-
def calculate_pseudocount(
|
15
|
+
def calculate_pseudocount(pos)
|
16
16
|
case @pseudocount
|
17
17
|
when Numeric
|
18
18
|
@pseudocount
|
19
19
|
when :log
|
20
|
-
|
20
|
+
count = pos.inject(0.0, &:+)
|
21
|
+
Math.log([count, 2].max)
|
21
22
|
when :sqrt
|
22
|
-
Math.sqrt(pcm.
|
23
|
+
Math.sqrt(pcm.inject(0.0, &:+))
|
23
24
|
when Proc
|
24
|
-
@pseudocount.call(
|
25
|
+
@pseudocount.call(pos)
|
25
26
|
else
|
26
27
|
raise Error, 'Unknown pseudocount type use numeric or :log or :sqrt or Proc with taking pcm parameter'
|
27
28
|
end
|
@@ -29,8 +30,8 @@ module Bioinform
|
|
29
30
|
|
30
31
|
def convert(pcm)
|
31
32
|
raise Error, "#{self.class}#convert accepts only models acting as PCM" unless MotifModel.acts_as_pcm?(pcm)
|
32
|
-
actual_pseudocount = calculate_pseudocount(pcm)
|
33
33
|
matrix = pcm.each_position.map do |pos|
|
34
|
+
actual_pseudocount = calculate_pseudocount(pos)
|
34
35
|
count = pos.inject(0.0, &:+)
|
35
36
|
pos.each_index.map do |index|
|
36
37
|
Math.log((pos[index] + @background.frequencies[index] * actual_pseudocount).to_f / (@background.frequencies[index]*(count + actual_pseudocount)) )
|
@@ -7,20 +7,34 @@ module Bioinform
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class PCM < PM
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
def self.count_validator(eps: 1.0e-4)
|
11
|
+
Validator.new{|matrix, alphabet|
|
12
|
+
errors = []
|
13
|
+
unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
14
|
+
errors << "Elements of PCM should be non-negative."
|
15
|
+
end
|
16
|
+
|
17
|
+
warnings = []
|
18
|
+
if eps
|
19
|
+
counts = matrix.map{|pos| pos.inject(0.0, &:+) }
|
20
|
+
unless (counts.max - counts.min) <= eps * counts.min
|
21
|
+
warnings << "PCM counts are different (discrepancy is greater than eps * MinCount; eps=#{eps}; MinCountn=#{counts.min})."
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
ValidationResult.new(errors: errors, warnings: warnings)
|
26
|
+
}
|
14
27
|
end
|
15
28
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
count
|
29
|
+
VALIDATOR = PM::VALIDATOR * PCM.count_validator(eps: 1.0e-4).make_strict
|
30
|
+
DIFFERENT_COUNTS_VALIDATOR = PM::VALIDATOR * PCM.count_validator(eps: nil).make_strict
|
31
|
+
|
32
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PCM::VALIDATOR)
|
33
|
+
super
|
34
|
+
# validator already checked count discrepancy. We store median count.
|
35
|
+
@count = matrix.map{|pos| pos.inject(0.0, &:+) }.sort[matrix.length / 2]
|
23
36
|
end
|
37
|
+
attr_reader :count
|
24
38
|
end
|
25
39
|
end
|
26
40
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../formatters/motif_formatter'
|
2
|
+
require_relative '../validator'
|
2
3
|
require_relative '../errors'
|
3
4
|
require_relative '../alphabet'
|
4
5
|
require_relative 'named_model'
|
@@ -6,47 +7,39 @@ require_relative 'named_model'
|
|
6
7
|
module Bioinform
|
7
8
|
module MotifModel
|
8
9
|
class PM
|
10
|
+
DEFAULT_PARSER = MatrixParser.new
|
11
|
+
TRIVIAL_VALIDATOR = Validator.new{|matrix, alphabet| ValidationResult.all_ok }
|
12
|
+
VALIDATOR = Validator.new{|matrix, alphabet|
|
13
|
+
errors = []
|
14
|
+
errors << "Matrix should be an Array." unless matrix.is_a? Array
|
15
|
+
errors << "Matrix shouldn't be empty." unless matrix.size > 0
|
16
|
+
errors << "Each matrix position should be an Array." unless matrix.all?{|pos| pos.is_a?(Array) }
|
17
|
+
errors << "Each matrix position should be of size compatible with alphabet (=#{alphabet.size})." unless matrix.all?{|pos| pos.size == alphabet.size }
|
18
|
+
errors << "Each matrix element should be Numeric." unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
19
|
+
ValidationResult.new(errors: errors)
|
20
|
+
}
|
21
|
+
|
9
22
|
attr_reader :matrix, :alphabet
|
10
|
-
def initialize(matrix,
|
23
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PM::VALIDATOR)
|
24
|
+
validation_results = validator.validate_params(matrix, alphabet)
|
25
|
+
unless validation_results.valid?
|
26
|
+
raise ValidationError.new('Invalid matrix.', validation_errors: validation_results)
|
27
|
+
end
|
11
28
|
@matrix = matrix
|
12
|
-
@alphabet =
|
13
|
-
raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
|
29
|
+
@alphabet = alphabet
|
14
30
|
end
|
15
31
|
|
16
|
-
def self.from_string(input,
|
17
|
-
parser = options.fetch(:parser, MatrixParser.new)
|
18
|
-
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
32
|
+
def self.from_string(input, alphabet: NucleotideAlphabet, parser: DEFAULT_PARSER)
|
19
33
|
info = parser.parse!(input)
|
20
34
|
self.new(info[:matrix], alphabet: alphabet).named( info[:name] )
|
21
35
|
end
|
22
36
|
|
23
|
-
def self.from_file(filename,
|
24
|
-
parser = options.fetch(:parser, MatrixParser.new)
|
25
|
-
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
37
|
+
def self.from_file(filename, alphabet: NucleotideAlphabet, parser: DEFAULT_PARSER)
|
26
38
|
info = parser.parse!(File.read(filename))
|
27
39
|
name = (info[:name] && !info[:name].strip.empty?) ? info[:name] : File.basename(filename, File.extname(filename))
|
28
40
|
self.new(info[:matrix], alphabet: alphabet).named( name )
|
29
41
|
end
|
30
42
|
|
31
|
-
def validation_errors
|
32
|
-
errors = []
|
33
|
-
errors << "matrix should be an Array" unless matrix.is_a? Array
|
34
|
-
errors << "matrix shouldn't be empty" unless matrix.size > 0
|
35
|
-
errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
|
36
|
-
errors << "each matrix position should be of size compatible with alphabet (=#{alphabet.size})" unless matrix.all?{|pos| pos.size == alphabet.size }
|
37
|
-
errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
38
|
-
errors
|
39
|
-
end
|
40
|
-
private :validation_errors
|
41
|
-
|
42
|
-
def valid?
|
43
|
-
validation_errors.empty?
|
44
|
-
rescue
|
45
|
-
false
|
46
|
-
end
|
47
|
-
|
48
|
-
private :valid?
|
49
|
-
|
50
43
|
def length
|
51
44
|
matrix.size
|
52
45
|
end
|
@@ -68,15 +61,15 @@ module Bioinform
|
|
68
61
|
end
|
69
62
|
|
70
63
|
def reversed
|
71
|
-
self.class.new(matrix.reverse, alphabet: alphabet)
|
64
|
+
self.class.new(matrix.reverse, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
72
65
|
end
|
73
66
|
|
74
67
|
def complemented
|
75
|
-
self.class.new(complement_matrix, alphabet: alphabet)
|
68
|
+
self.class.new(complement_matrix, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
76
69
|
end
|
77
70
|
|
78
71
|
def reverse_complemented
|
79
|
-
self.class.new(complement_matrix.reverse, alphabet: alphabet)
|
72
|
+
self.class.new(complement_matrix.reverse, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
80
73
|
end
|
81
74
|
|
82
75
|
alias_method :revcomp, :reverse_complemented
|
@@ -88,6 +81,16 @@ module Bioinform
|
|
88
81
|
end
|
89
82
|
private :complement_matrix
|
90
83
|
|
84
|
+
def rounded(precision: 0)
|
85
|
+
return self if !precision
|
86
|
+
rounded_matrix = matrix.map{|pos|
|
87
|
+
pos.map{|el|
|
88
|
+
el.round(precision)
|
89
|
+
}
|
90
|
+
}
|
91
|
+
self.class.new(rounded_matrix, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
92
|
+
end
|
93
|
+
|
91
94
|
# def consensus
|
92
95
|
# ConsensusFormatter.by_maximal_elements.format_string(self)
|
93
96
|
# end
|
@@ -7,11 +7,28 @@ module Bioinform
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class PPM < PM
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
def self.probability_validator(eps: 1.0e-4)
|
11
|
+
Validator.new{|matrix, alphabet|
|
12
|
+
errors = []
|
13
|
+
unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
14
|
+
errors << "Elements of PPM should be non-negative."
|
15
|
+
end
|
16
|
+
|
17
|
+
warnings = []
|
18
|
+
probability_sums = matrix.map{|pos| pos.inject(0.0, &:+) }
|
19
|
+
max_discrepancy = probability_sums.map{|sum| (sum - 1.0).abs }.max
|
20
|
+
unless max_discrepancy <= eps
|
21
|
+
warnings << "PPM should sum up to 1, with discrepancy not greater than #{eps}."
|
22
|
+
end
|
23
|
+
|
24
|
+
ValidationResult.new(errors: errors, warnings: warnings)
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
VALIDATOR = PM::VALIDATOR * PPM.probability_validator(eps: 1.0e-4).make_strict
|
29
|
+
|
30
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PPM::VALIDATOR)
|
31
|
+
super # default validator redefined
|
15
32
|
end
|
16
33
|
end
|
17
34
|
end
|
@@ -7,6 +7,11 @@ module Bioinform
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class PWM < PM
|
10
|
+
VALIDATOR = PM::VALIDATOR
|
11
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PWM::VALIDATOR)
|
12
|
+
super # default validator redefined
|
13
|
+
end
|
14
|
+
|
10
15
|
def score(word)
|
11
16
|
raise Error, 'Word length should be the same as PWM length' unless word.length == length
|
12
17
|
length.times.map do |pos|
|
@@ -14,10 +19,13 @@ module Bioinform
|
|
14
19
|
end.inject(0.0, &:+)
|
15
20
|
end
|
16
21
|
|
17
|
-
def discreted(rate,
|
18
|
-
|
19
|
-
|
20
|
-
|
22
|
+
def discreted(rate, rounding_method: :ceil)
|
23
|
+
discreted_matrix = matrix.map{|position|
|
24
|
+
position.map{|element|
|
25
|
+
(element * rate).send(rounding_method)
|
26
|
+
}
|
27
|
+
}
|
28
|
+
self.class.new(discreted_matrix, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
21
29
|
end
|
22
30
|
|
23
31
|
def zero_column
|
data/lib/bioinform/errors.rb
CHANGED
@@ -11,7 +11,14 @@ module Bioinform
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def to_s
|
14
|
-
|
14
|
+
case @validation_errors
|
15
|
+
when Array
|
16
|
+
"#{super} (#{@validation_errors.join('; ')})"
|
17
|
+
when ValidationResult
|
18
|
+
"#{super}\n#{@validation_errors}"
|
19
|
+
else
|
20
|
+
"#{super} (#{@validation_errors})"
|
21
|
+
end
|
15
22
|
end
|
16
23
|
end
|
17
24
|
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Bioinform
|
2
|
+
class ValidationResult
|
3
|
+
attr_reader :errors, :warnings
|
4
|
+
def initialize(errors: [], warnings: [])
|
5
|
+
@errors = errors.freeze
|
6
|
+
@warnings = warnings.freeze
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.all_ok
|
10
|
+
self.new(errors: [], warnings: [])
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?
|
14
|
+
errors.empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
msg = ""
|
19
|
+
|
20
|
+
if errors && !errors.empty?
|
21
|
+
msg += "Errors:\n" + errors.join("\n") + "\n"
|
22
|
+
end
|
23
|
+
|
24
|
+
if warnings && !warnings.empty?
|
25
|
+
msg += "Warnings:\n" + warnings.join("\n")
|
26
|
+
end
|
27
|
+
|
28
|
+
msg.empty? ? "{No errors, no warnings}" : "{#{msg}}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def with_errors(additional_errors)
|
32
|
+
ValidationResult.new(errors: errors + additional_errors, warnings: warnings)
|
33
|
+
end
|
34
|
+
|
35
|
+
def with_warnings(additional_warnings)
|
36
|
+
ValidationResult.new(errors: errors, warnings: warnings + additional_warnings)
|
37
|
+
end
|
38
|
+
|
39
|
+
# errors from both operands
|
40
|
+
def +(other)
|
41
|
+
ValidationResult.new(errors: errors + other.errors, warnings: warnings + other.warnings)
|
42
|
+
end
|
43
|
+
|
44
|
+
def hash
|
45
|
+
[@errors, @warnings].hash
|
46
|
+
end
|
47
|
+
def eql?(other)
|
48
|
+
(other.class == self.class) && (errors == other.errors) && (warnings == other.warnings)
|
49
|
+
end
|
50
|
+
def ==(other)
|
51
|
+
other.is_a?(ValidationResult) && (errors == other.errors) && (warnings == other.warnings)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Validator
|
56
|
+
def initialize(&block)
|
57
|
+
if block_given?
|
58
|
+
@validation_block = block
|
59
|
+
else
|
60
|
+
@validation_block = ->(*args, &b){ ValidationResult.new }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def validate_params(*args, &block)
|
65
|
+
@validation_block.call(*args, &block)
|
66
|
+
rescue => e
|
67
|
+
msg = "Unexpected error occured during validation: #{e.message}. Backtrace:\n" + e.backtrace.join("\n")
|
68
|
+
ValidationResult.new(errors: [msg])
|
69
|
+
end
|
70
|
+
|
71
|
+
# Validate both
|
72
|
+
def *(other)
|
73
|
+
Validator.new{|*args, &b|
|
74
|
+
validate_params(*args, &b) + other.validate_params(*args, &b)
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
# treat warnings as errors
|
79
|
+
def make_strict
|
80
|
+
Validator.new{|*args, &block|
|
81
|
+
result = self.validate_params(*args, &block)
|
82
|
+
ValidationResult.new(errors: result.errors + result.warnings)
|
83
|
+
}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/lib/bioinform/version.rb
CHANGED
data/spec/alphabet_spec.rb
CHANGED
@@ -2,9 +2,9 @@ require 'bioinform/alphabet'
|
|
2
2
|
|
3
3
|
describe Bioinform::ComplementableAlphabet do
|
4
4
|
specify "should raise if complement's complement is not original letter" do
|
5
|
-
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A]) }.to raise_error
|
6
|
-
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:B,:C], [:C,:B,:B,:A]) }.to raise_error
|
7
|
-
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A,:C]) }.to raise_error
|
5
|
+
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A]) }.to raise_error(Bioinform::Error)
|
6
|
+
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:B,:C], [:C,:B,:B,:A]) }.to raise_error(Bioinform::Error)
|
7
|
+
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A,:C]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
|
10
10
|
context 'usage with alphabet non-symbolized, non-upcased' do
|
@@ -48,10 +48,10 @@ describe Bioinform::NucleotideAlphabet do
|
|
48
48
|
specify { expect( Bioinform::NucleotideAlphabet.size ).to eq 4 }
|
49
49
|
specify { expect( Bioinform::NucleotideAlphabet.complement_letter(:A) ).to eq :T }
|
50
50
|
specify { expect( Bioinform::NucleotideAlphabet.complement_letter(:C) ).to eq :G }
|
51
|
-
specify { expect{ Bioinform::NucleotideAlphabet.complement_letter(:N) }.to raise_error
|
51
|
+
specify { expect{ Bioinform::NucleotideAlphabet.complement_letter(:N) }.to raise_error(Bioinform::Error) }
|
52
52
|
|
53
53
|
specify { expect(Bioinform::NucleotideAlphabet.complement_index(0)).to eq 3 }
|
54
|
-
specify { expect{Bioinform::NucleotideAlphabet.complement_index(4)}.to raise_error
|
54
|
+
specify { expect{Bioinform::NucleotideAlphabet.complement_index(4)}.to raise_error(Bioinform::Error)}
|
55
55
|
end
|
56
56
|
|
57
57
|
describe Bioinform::NucleotideAlphabetWithN do
|
data/spec/background_spec.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'bioinform/background'
|
2
2
|
|
3
3
|
describe Bioinform::Frequencies do
|
4
|
-
specify { expect{Bioinform::Frequencies.new([1,1,1,1]) }.to raise_error
|
5
|
-
specify { expect{Bioinform::Frequencies.new([0.3,0.3,0.3,0.3]) }.to raise_error
|
4
|
+
specify { expect{Bioinform::Frequencies.new([1,1,1,1]) }.to raise_error(Bioinform::Error) }
|
5
|
+
specify { expect{Bioinform::Frequencies.new([0.3,0.3,0.3,0.3]) }.to raise_error(Bioinform::Error) }
|
6
6
|
specify { expect{Bioinform::Frequencies.new([0.25,0.25,0.25,0.25]) }.not_to raise_error }
|
7
7
|
specify { expect{Bioinform::Frequencies.new([0.2,0.3,0.3,0.2]) }.not_to raise_error }
|
8
8
|
|
@@ -52,6 +52,6 @@ describe Bioinform::Background do
|
|
52
52
|
specify { expect(Bioinform::Background.from_string('uniform')).to eq Bioinform::Frequencies.new([0.25,0.25,0.25,0.25]) }
|
53
53
|
specify { expect(Bioinform::Background.from_string('UNIFORM')).to eq Bioinform::Frequencies.new([0.25,0.25,0.25,0.25]) }
|
54
54
|
specify { expect(Bioinform::Background.from_string('wordwise')).to eq Bioinform::WordwiseBackground.new }
|
55
|
-
specify { expect{Bioinform::Background.from_string('0.25,0.25,0.25')}.to raise_error
|
56
|
-
specify { expect{Bioinform::Background.from_string('unifromm')}.to raise_error
|
55
|
+
specify { expect{Bioinform::Background.from_string('0.25,0.25,0.25')}.to raise_error(Bioinform::Error) }
|
56
|
+
specify { expect{Bioinform::Background.from_string('unifromm')}.to raise_error(Bioinform::Error) }
|
57
57
|
end
|
data/spec/cli/cli_spec.rb
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
require 'bioinform/cli'
|
3
3
|
|
4
|
+
def compare_positions(pos_1, pos_2, eps: 1e-6)
|
5
|
+
pos_1.zip(pos_2).all?{|el_1, el_2|
|
6
|
+
(el_1 - el_2).abs <= eps
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
def compare_matrices(matrix_1, matrix_2, eps: 1e-6)
|
11
|
+
matrix_1.length == matrix_2.length && \
|
12
|
+
matrix_1.zip(matrix_2).all?{|pos_1, pos_2|
|
13
|
+
compare_positions(pos_1, pos_2, eps: eps)
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def compare_models_in_files(file_1, file_2, klass: Bioinform::MotifModel::PM, eps: 1e-6)
|
18
|
+
pm_1 = klass.from_file(file_1)
|
19
|
+
pm_2 = klass.from_file(file_2)
|
20
|
+
pm_1.name == pm_2.name && compare_matrices(pm_1.matrix, pm_2.matrix)
|
21
|
+
end
|
22
|
+
|
4
23
|
describe Bioinform::CLI do
|
5
24
|
describe '.change_folder_and_extension' do
|
6
25
|
it 'should change extension and folder' do
|
data/spec/cli/pcm2pwm_spec.rb
CHANGED
@@ -24,56 +24,56 @@ describe Bioinform::CLI::PCM2PWM do
|
|
24
24
|
it 'should transform single PCM to PWM' do
|
25
25
|
run_pcm2pwm('KLF4_f2.pcm')
|
26
26
|
expect(File.exist?('KLF4_f2.pwm')).to be_truthy
|
27
|
-
expect(
|
27
|
+
expect(compare_models_in_files('KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
28
28
|
end
|
29
29
|
|
30
30
|
it 'should transform multiple PCMs to PWMs' do
|
31
31
|
run_pcm2pwm('KLF4_f2.pcm SP1_f1.pcm')
|
32
32
|
|
33
33
|
expect(File.exist?('KLF4_f2.pwm')).to be_truthy
|
34
|
-
expect(
|
34
|
+
expect(compare_models_in_files('KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
35
35
|
|
36
36
|
expect(File.exist?('SP1_f1.pwm')).to be_truthy
|
37
|
-
expect(
|
37
|
+
expect(compare_models_in_files('SP1_f1.pwm', 'SP1_f1.pwm.result')).to be_truthy
|
38
38
|
end
|
39
39
|
|
40
40
|
it 'should transform extension to specified with --extension option' do
|
41
41
|
run_pcm2pwm('KLF4_f2.pcm --extension=pat')
|
42
42
|
expect(File.exist?('KLF4_f2.pat')).to be_truthy
|
43
|
-
expect(
|
43
|
+
expect(compare_models_in_files('KLF4_f2.pat', 'KLF4_f2.pwm.result')).to be_truthy
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'should save PWMs into folder specified with --folder option when folder exists' do
|
47
47
|
Dir.mkdir('pwm_folder') unless Dir.exist?('pwm_folder')
|
48
48
|
run_pcm2pwm('KLF4_f2.pcm --folder=pwm_folder')
|
49
49
|
expect(File.exist?('pwm_folder/KLF4_f2.pwm')).to be_truthy
|
50
|
-
expect(
|
50
|
+
expect(compare_models_in_files('pwm_folder/KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
51
51
|
end
|
52
52
|
it 'should save PWMs into folder specified with --folder option' do
|
53
53
|
FileUtils.rm_rf('pwm_folder') if Dir.exist?('pwm_folder')
|
54
54
|
run_pcm2pwm('KLF4_f2.pcm --folder=pwm_folder')
|
55
55
|
expect(File.exist?('pwm_folder/KLF4_f2.pwm')).to be_truthy
|
56
|
-
expect(
|
56
|
+
expect(compare_models_in_files('pwm_folder/KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'should process PCMs with names obtained from STDIN' do
|
60
60
|
provide_stdin('KLF4_f2.pcm SP1_f1.pcm') { run_pcm2pwm('') }
|
61
61
|
expect(File.exist?('KLF4_f2.pwm')).to be_truthy
|
62
|
-
expect(
|
62
|
+
expect(compare_models_in_files('KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
63
63
|
|
64
64
|
expect(File.exist?('SP1_f1.pwm')).to be_truthy
|
65
|
-
expect(
|
65
|
+
expect(compare_models_in_files('SP1_f1.pwm', 'SP1_f1.pwm.result')).to be_truthy
|
66
66
|
end
|
67
67
|
|
68
68
|
it 'should process PCMs with names obtained from STDIN when there are some options' do
|
69
69
|
provide_stdin('KLF4_f2.pcm') { run_pcm2pwm('-e pat') }
|
70
70
|
expect(File.exist?('KLF4_f2.pat')).to be_truthy
|
71
|
-
expect(
|
71
|
+
expect(compare_models_in_files('KLF4_f2.pat', 'KLF4_f2.pwm.result')).to be_truthy
|
72
72
|
end
|
73
73
|
|
74
74
|
it 'should process PCMs having filename with spaces' do
|
75
75
|
run_pcm2pwm('"KLF4 f2 spaced name.pcm"')
|
76
76
|
expect(File.exist?('KLF4 f2 spaced name.pwm')).to be_truthy
|
77
|
-
expect(
|
77
|
+
expect(compare_models_in_files('KLF4 f2 spaced name.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
78
78
|
end
|
79
79
|
end
|
@@ -4,7 +4,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PPMConverter do
|
|
4
4
|
let(:pcm) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4]]) }
|
5
5
|
let(:pwm) { Bioinform::MotifModel::PWM.new([[1,2,3,4],[2,2,2,4]]) }
|
6
6
|
let(:ppm) { Bioinform::MotifModel::PPM.new([[0.1,0.2,0.3,0.4],[0.2,0.2,0.2,0.4]]) }
|
7
|
-
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]]) }
|
7
|
+
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]], validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR) }
|
8
8
|
|
9
9
|
let(:named_pcm) { Bioinform::MotifModel::NamedModel.new(pcm, 'motif name') }
|
10
10
|
let(:named_pwm) { Bioinform::MotifModel::NamedModel.new(pwm, 'motif name') }
|
@@ -26,7 +26,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PPMConverter do
|
|
26
26
|
specify { expect(converter.convert(named_pcm)).to be_kind_of Bioinform::MotifModel::NamedModel }
|
27
27
|
specify { expect(converter.convert(named_pcm).model).to be_kind_of Bioinform::MotifModel::PPM }
|
28
28
|
specify { expect(converter.convert(named_pcm).name).to eq 'motif name' }
|
29
|
-
specify { expect{ converter.convert(pwm) }.to raise_error
|
30
|
-
specify { expect{ converter.convert(named_pwm) }.to raise_error
|
31
|
-
specify { expect{ converter.convert(ppm) }.to raise_error
|
29
|
+
specify { expect{ converter.convert(pwm) }.to raise_error(Bioinform::Error) }
|
30
|
+
specify { expect{ converter.convert(named_pwm) }.to raise_error(Bioinform::Error) }
|
31
|
+
specify { expect{ converter.convert(ppm) }.to raise_error(Bioinform::Error) }
|
32
32
|
end
|
@@ -4,7 +4,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PWMConverter do
|
|
4
4
|
let(:pcm) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4]]) }
|
5
5
|
let(:pwm) { Bioinform::MotifModel::PWM.new([[1,2,3,4],[2,2,2,4]]) }
|
6
6
|
let(:ppm) { Bioinform::MotifModel::PPM.new([[0.1,0.2,0.3,0.4],[0.2,0.2,0.2,0.4]]) }
|
7
|
-
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]]) }
|
7
|
+
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]], validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR) }
|
8
8
|
|
9
9
|
let(:named_pcm) { Bioinform::MotifModel::NamedModel.new(pcm, 'motif name') }
|
10
10
|
let(:named_pwm) { Bioinform::MotifModel::NamedModel.new(pwm, 'motif name') }
|
@@ -16,7 +16,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PWMConverter do
|
|
16
16
|
specify { expect(converter.background).to eq Bioinform::Background::Uniform }
|
17
17
|
|
18
18
|
specify { expect(converter.convert(pcm)).to be_kind_of Bioinform::MotifModel::PWM }
|
19
|
-
specify { expect(converter.calculate_pseudocount(pcm)).to eq Math.log(10) }
|
19
|
+
specify { expect(converter.calculate_pseudocount(pcm.matrix[0])).to eq Math.log(10) }
|
20
20
|
|
21
21
|
specify do
|
22
22
|
cnt = 10
|
@@ -27,14 +27,24 @@ describe Bioinform::ConversionAlgorithms::PCM2PWMConverter do
|
|
27
27
|
[Math.log((2+k*0.25)/den), Math.log((2+k*0.25)/den), Math.log((2+k*0.25)/den), Math.log((4+k*0.25)/den)] ]
|
28
28
|
end
|
29
29
|
|
30
|
-
specify { expect{ converter.convert(pcm_different_counts) }.
|
30
|
+
specify { expect{ converter.convert(pcm_different_counts) }.not_to raise_error }
|
31
|
+
specify {
|
32
|
+
counts = [10, 10, 13]
|
33
|
+
pseudocounts = counts.map{|el| Math.log(el) }
|
34
|
+
denominators = counts.zip(pseudocounts).map{|count, pseudocount| 0.25 * (count + pseudocount) }
|
35
|
+
expect(converter.convert(pcm_different_counts).matrix).to eq [
|
36
|
+
[1,2,3,4].map{|el| Math.log((el + pseudocounts[0]*0.25) / denominators[0]) },
|
37
|
+
[2,2,2,4].map{|el| Math.log((el + pseudocounts[1]*0.25) / denominators[1]) },
|
38
|
+
[3,3,3,4].map{|el| Math.log((el + pseudocounts[2]*0.25) / denominators[2]) },
|
39
|
+
]
|
40
|
+
}
|
31
41
|
|
32
42
|
specify { expect(converter.convert(named_pcm)).to be_kind_of Bioinform::MotifModel::NamedModel }
|
33
43
|
specify { expect(converter.convert(named_pcm).model).to be_kind_of Bioinform::MotifModel::PWM }
|
34
44
|
specify { expect(converter.convert(named_pcm).name).to eq 'motif name' }
|
35
|
-
specify { expect{ converter.convert(pwm) }.to raise_error
|
36
|
-
specify { expect{ converter.convert(named_pwm) }.to raise_error
|
37
|
-
specify { expect{ converter.convert(ppm) }.to raise_error
|
45
|
+
specify { expect{ converter.convert(pwm) }.to raise_error(Bioinform::Error) }
|
46
|
+
specify { expect{ converter.convert(named_pwm) }.to raise_error(Bioinform::Error) }
|
47
|
+
specify { expect{ converter.convert(ppm) }.to raise_error(Bioinform::Error) }
|
38
48
|
end
|
39
49
|
|
40
50
|
context 'with specified explicitly pseudocount' do
|
@@ -8,7 +8,7 @@ describe Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter do
|
|
8
8
|
specify{ expect(converter.iupac_alphabet).to eq Bioinform::NucleotideAlphabetWithN }
|
9
9
|
specify 'can convert only PWMs' do
|
10
10
|
pcm = Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4]])
|
11
|
-
expect { converter.convert(pcm) }.to raise_error
|
11
|
+
expect { converter.convert(pcm) }.to raise_error(Bioinform::Error)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -28,12 +28,12 @@ describe Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter do
|
|
28
28
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:C,:G,:T,:N], [:T,:G,:C,:A,:N])
|
29
29
|
custom_matrix = [[1,2,3,1.567, 0.1],[12,-11,12,0, 0.1],[-1.1, 0.6, 0.4, 0.321, 0.1]]
|
30
30
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(custom_matrix, alphabet: custom_alphabet)
|
31
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
31
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
32
32
|
end
|
33
33
|
specify do
|
34
34
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:X,:Y,:T], [:T,:Y,:X,:A])
|
35
35
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(matrix, alphabet: custom_alphabet)
|
36
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
36
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
37
37
|
end
|
38
38
|
|
39
39
|
end
|
@@ -54,12 +54,12 @@ describe Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter do
|
|
54
54
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:C,:G,:T,:N], [:T,:G,:C,:A,:N])
|
55
55
|
custom_matrix = [[1,2,3,1.567, 0.1],[12,-11,12,0, 0.1],[-1.1, 0.6, 0.4, 0.321, 0.1]]
|
56
56
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(custom_matrix, alphabet: custom_alphabet)
|
57
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
57
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
58
58
|
end
|
59
59
|
specify do
|
60
60
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:X,:Y,:T], [:T,:Y,:X,:A])
|
61
61
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(matrix, alphabet: custom_alphabet)
|
62
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
62
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
63
63
|
end
|
64
64
|
end
|
65
65
|
end
|
@@ -50,8 +50,8 @@ describe Bioinform::ConversionAlgorithms::PWM2PCMConverter do
|
|
50
50
|
specify { expect(converter.convert(named_pwm).model).to be_kind_of Bioinform::MotifModel::PCM }
|
51
51
|
specify { expect(converter.convert(named_pwm).name).to eq 'motif name' }
|
52
52
|
|
53
|
-
specify { expect{ converter.convert(pcm) }.to raise_error
|
54
|
-
specify { expect{ converter.convert(ppm) }.to raise_error
|
55
|
-
specify { expect{ converter.convert(named_pcm) }.to raise_error
|
53
|
+
specify { expect{ converter.convert(pcm) }.to raise_error(Bioinform::Error) }
|
54
|
+
specify { expect{ converter.convert(ppm) }.to raise_error(Bioinform::Error) }
|
55
|
+
specify { expect{ converter.convert(named_pcm) }.to raise_error(Bioinform::Error) }
|
56
56
|
end
|
57
57
|
end
|
@@ -4,7 +4,7 @@ describe Bioinform::MotifModel::PCM do
|
|
4
4
|
|
5
5
|
describe '.new' do
|
6
6
|
specify 'fails on matrix having negative elements' do
|
7
|
-
expect { Bioinform::MotifModel::PCM.new([[1,2,1,3],[3,3,0,1], [-2, 3, 3, 3]]) }.to raise_error
|
7
|
+
expect { Bioinform::MotifModel::PCM.new([[1,2,1,3],[3,3,0,1], [-2, 3, 3, 3]]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
|
10
10
|
context 'with valid matrix' do
|
@@ -21,10 +21,13 @@ describe Bioinform::MotifModel::PCM do
|
|
21
21
|
context 'with different counts in different positions' do
|
22
22
|
let(:matrix) { [[1,2,1,3],[30,10,100,11000], [1, 0, 3, 3]] }
|
23
23
|
specify do
|
24
|
-
expect { Bioinform::MotifModel::PCM.new(matrix) }.
|
24
|
+
expect { Bioinform::MotifModel::PCM.new(matrix) }.to raise_error(Bioinform::Error)
|
25
25
|
end
|
26
26
|
specify do
|
27
|
-
expect
|
27
|
+
expect { Bioinform::MotifModel::PCM.new(matrix, validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR) }.not_to raise_error
|
28
|
+
end
|
29
|
+
specify do
|
30
|
+
expect( Bioinform::MotifModel::PCM.new(matrix, validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR).matrix ).to eq matrix
|
28
31
|
end
|
29
32
|
end
|
30
33
|
end
|
@@ -45,7 +48,7 @@ describe Bioinform::MotifModel::PCM do
|
|
45
48
|
|
46
49
|
context 'with different counts in different positions' do
|
47
50
|
let(:matrix) { [[1,2,1,3],[30,10,100,11000], [1, 0, 3, 3]] }
|
48
|
-
specify{ expect{ pcm.count }.to raise_error
|
51
|
+
specify{ expect{ pcm.count }.to raise_error(Bioinform::Error) }
|
49
52
|
end
|
50
53
|
end
|
51
54
|
|
data/spec/data_models/pm_spec.rb
CHANGED
@@ -4,19 +4,19 @@ describe Bioinform::MotifModel::PM do
|
|
4
4
|
|
5
5
|
describe '.new' do
|
6
6
|
specify 'with matrix having more than 4 elements in a position' do
|
7
|
-
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error
|
7
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
|
10
10
|
specify 'with matrix having less than 4 elements in a position' do
|
11
|
-
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error
|
11
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error(Bioinform::Error)
|
12
12
|
end
|
13
13
|
|
14
14
|
specify 'with matrix having positions in rows, nucleotides in columns' do
|
15
|
-
expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error
|
15
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error(Bioinform::Error)
|
16
16
|
end
|
17
17
|
|
18
18
|
specify 'with empty matrix' do
|
19
|
-
expect { Bioinform::MotifModel::PM.new([]) }.to raise_error
|
19
|
+
expect { Bioinform::MotifModel::PM.new([]) }.to raise_error(Bioinform::Error)
|
20
20
|
end
|
21
21
|
|
22
22
|
context 'with valid matrix' do
|
@@ -105,7 +105,7 @@ describe Bioinform::MotifModel::PM do
|
|
105
105
|
let(:matrix_15) { [[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0],
|
106
106
|
[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
|
107
107
|
specify do
|
108
|
-
expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error
|
108
|
+
expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error(Bioinform::Error)
|
109
109
|
end
|
110
110
|
specify do
|
111
111
|
expect{ Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }.not_to raise_error
|
@@ -4,12 +4,12 @@ describe Bioinform::MotifModel::PPM do
|
|
4
4
|
|
5
5
|
describe '.new' do
|
6
6
|
specify 'fails on matrix having negative elements' do
|
7
|
-
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, -0.1, -0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error
|
7
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, -0.1, -0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
specify 'fails on matrix having sum of position elements different from 1' do
|
10
|
-
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, 0.1, 0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error
|
11
|
-
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.3, 0.1, 0.1, 0.3],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error
|
12
|
-
expect { Bioinform::MotifModel::PPM.new([[0.3, 0.1, 0.1, 0.3],[0.3, 0.1, 0.1, 0.3],[0.2, 0.2, 0.2, 0.2]]) }.to raise_error
|
10
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, 0.1, 0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error(Bioinform::Error)
|
11
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.3, 0.1, 0.1, 0.3],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error(Bioinform::Error)
|
12
|
+
expect { Bioinform::MotifModel::PPM.new([[0.3, 0.1, 0.1, 0.3],[0.3, 0.1, 0.1, 0.3],[0.2, 0.2, 0.2, 0.2]]) }.to raise_error(Bioinform::Error)
|
13
13
|
end
|
14
14
|
|
15
15
|
context 'with valid matrix' do
|
@@ -57,12 +57,12 @@ describe Bioinform::MotifModel::PWM do
|
|
57
57
|
end
|
58
58
|
|
59
59
|
describe '#left_augmented' do
|
60
|
-
specify { expect{pwm.left_augmented(-1)}.to raise_error
|
60
|
+
specify { expect{pwm.left_augmented(-1)}.to raise_error(Bioinform::Error) }
|
61
61
|
specify { expect(pwm.left_augmented(0)).to eq pwm }
|
62
62
|
specify { expect(pwm.left_augmented(2)).to eq Bioinform::MotifModel::PWM.new( [[0,0,0,0],[0,0,0,0],[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]]) }
|
63
63
|
end
|
64
64
|
describe '#right_augmented' do
|
65
|
-
specify { expect{pwm.right_augmented(-1)}.to raise_error
|
65
|
+
specify { expect{pwm.right_augmented(-1)}.to raise_error(Bioinform::Error) }
|
66
66
|
specify { expect(pwm.right_augmented(0)).to eq pwm }
|
67
67
|
specify { expect(pwm.right_augmented(2)).to eq Bioinform::MotifModel::PWM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321],[0,0,0,0],[0,0,0,0]]) }
|
68
68
|
end
|
@@ -93,7 +93,8 @@ describe Bioinform::MotifModel::PWM do
|
|
93
93
|
[-1, 1, 1, 1, 12,22,32,42,52,62, 702,802,902,1002, 10002 ]] }
|
94
94
|
specify { expect(pwm.discreted(1).alphabet).to eq Bioinform::IUPACAlphabet}
|
95
95
|
|
96
|
-
|
96
|
+
let(:iupac_converter) { Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter.new }
|
97
|
+
specify { expect{ iupac_converter.convert(pwm) }.to raise_error(Bioinform::Error) }
|
97
98
|
end
|
98
99
|
|
99
100
|
describe '.from_string' do
|
@@ -4,7 +4,7 @@ require 'bioinform/formatters/consensus_formatter'
|
|
4
4
|
describe Bioinform::ConsensusFormatter do
|
5
5
|
let(:pm) { Bioinform::MotifModel::PM.new([[10,30,10,28], [30,16,16,16], [12,30,10,26], [26,27,27,1]]) }
|
6
6
|
|
7
|
-
specify('.new without a block raises error') { expect{ Bioinform::ConsensusFormatter.new }.to raise_error
|
7
|
+
specify('.new without a block raises error') { expect{ Bioinform::ConsensusFormatter.new }.to raise_error(Bioinform::Error) }
|
8
8
|
|
9
9
|
context 'custom formatter' do
|
10
10
|
let(:formatter){ Bioinform::ConsensusFormatter.new{|pos, el, ind| (pos.max - el) < pos.max * 0.1 } }
|
@@ -24,7 +24,7 @@ describe Bioinform::MotifFormatter do
|
|
24
24
|
end
|
25
25
|
context 'with with_name equal to true' do
|
26
26
|
let(:formatter) { Bioinform::MotifFormatter.new(with_name: true) }
|
27
|
-
specify { expect{ formatter.format(motif) }.to raise_error
|
27
|
+
specify { expect{ formatter.format(motif) }.to raise_error(Bioinform::Error) }
|
28
28
|
specify { expect( formatter.format(motif.named('')) ).to eq ">\n" +
|
29
29
|
default_matrix_string }
|
30
30
|
specify { expect( formatter.format(motif.named('Stub name')) ).to eq ">Stub name\n" +
|
@@ -38,7 +38,7 @@ describe Bioinform::MotifFormatter do
|
|
38
38
|
default_matrix_string }
|
39
39
|
end
|
40
40
|
context 'with with_name value different from true/false/:auto' do
|
41
|
-
specify{ expect { Bioinform::MotifFormatter.new(with_name: :somewhat) }.to raise_error
|
41
|
+
specify{ expect { Bioinform::MotifFormatter.new(with_name: :somewhat) }.to raise_error(Bioinform::Error) }
|
42
42
|
end
|
43
43
|
|
44
44
|
context 'with nucleotides_in :columns' do
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'bioinform/parsers/matrix_parser'
|
2
2
|
|
3
3
|
describe Bioinform::MatrixParser do
|
4
|
-
specify { expect{ Bioinform::MatrixParser.new(nucleotides_in: :somewhat) }.to raise_error
|
4
|
+
specify { expect{ Bioinform::MatrixParser.new(nucleotides_in: :somewhat) }.to raise_error(Bioinform::Error) }
|
5
5
|
|
6
6
|
context 'with default options' do
|
7
7
|
subject(:parser) { Bioinform::MatrixParser.new }
|
@@ -40,9 +40,9 @@ describe Bioinform::MatrixParser do
|
|
40
40
|
let(:input_not_allowed_2) {"A\tC\tG\tT\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
41
41
|
let(:input_not_allowed_3) {"##01\t1\t2\t3\t4\n" + "##02\t11\t12\t13\t14" }
|
42
42
|
specify { expect( parser.parse!(input_allowed) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
43
|
-
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error
|
44
|
-
specify { expect{ parser.parse!(input_not_allowed_2) }.to raise_error
|
45
|
-
specify { expect{ parser.parse!(input_not_allowed_3) }.to raise_error
|
43
|
+
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error(Bioinform::Error) }
|
44
|
+
specify { expect{ parser.parse!(input_not_allowed_2) }.to raise_error(Bioinform::Error) }
|
45
|
+
specify { expect{ parser.parse!(input_not_allowed_3) }.to raise_error(Bioinform::Error) }
|
46
46
|
end
|
47
47
|
context 'with has_name equal to :auto parser can either have name or not' do
|
48
48
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: :auto) }
|
@@ -51,7 +51,7 @@ describe Bioinform::MatrixParser do
|
|
51
51
|
let(:input_with_bad_name) {"-Name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
52
52
|
specify { expect( parser.parse!(input_without_name) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
53
53
|
specify { expect( parser.parse!(input_with_name) ).to eq( {name: 'PM Name', matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
54
|
-
specify { expect{ parser.parse!(input_with_bad_name) }.to raise_error
|
54
|
+
specify { expect{ parser.parse!(input_with_bad_name) }.to raise_error(Bioinform::Error) }
|
55
55
|
end
|
56
56
|
context 'parser having name and header row' do
|
57
57
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: true, has_header_row: true) }
|
@@ -62,7 +62,7 @@ describe Bioinform::MatrixParser do
|
|
62
62
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: false, has_header_row: true) }
|
63
63
|
let(:input) {"A\tC\tG\tT\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
64
64
|
specify { expect( parser.parse!(input) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
65
|
-
specify { expect{ parser.parse!("Motif name\n" + input) }.to raise_error
|
65
|
+
specify { expect{ parser.parse!("Motif name\n" + input) }.to raise_error(Bioinform::Error) }
|
66
66
|
end
|
67
67
|
context 'parser having header column' do
|
68
68
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_header_column: true) }
|
@@ -101,7 +101,7 @@ describe Bioinform::MatrixParser do
|
|
101
101
|
let(:input_allowed) {"NA>Motif name\tother info\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
102
102
|
let(:input_not_allowed) {"Motif name\tother info\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
103
103
|
specify { expect( parser.parse!(input_allowed) ).to eq( {name: "Motif name\tother info", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
104
|
-
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error
|
104
|
+
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error(Bioinform::Error) }
|
105
105
|
end
|
106
106
|
|
107
107
|
context 'parser reducing number of nucleotides' do
|
@@ -127,7 +127,7 @@ describe Bioinform::MatrixParser do
|
|
127
127
|
context 'parser which hasn\'t enough number of nucleotides' do
|
128
128
|
subject(:parser) { Bioinform::MatrixParser.new(has_name: true, fix_nucleotides_number: 4) }
|
129
129
|
let(:input) {">PM name\n" + "1\t2\t3\n" + "11\t12\t13" }
|
130
|
-
specify { expect{ parser.parse!(input) }.to raise_error
|
130
|
+
specify { expect{ parser.parse!(input) }.to raise_error(Bioinform::Error) }
|
131
131
|
end
|
132
132
|
|
133
133
|
context 'parser with auto transposition' do
|
data/spec/spec_helper_source.rb
CHANGED
@@ -37,7 +37,7 @@ def parser_specs(parser, good_cases, bad_cases)
|
|
37
37
|
|
38
38
|
bad_cases.each do |case_description, input|
|
39
39
|
it "should raise an exception on parsing #{case_description}" do
|
40
|
-
expect{ parser.parse!(input[:input]) }.to raise_error
|
40
|
+
expect{ parser.parse!(input[:input]) }.to raise_error(Bioinform::Error)
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A bunch of useful classes for bioinformatics
|
14
14
|
email:
|
@@ -22,6 +22,7 @@ extra_rdoc_files: []
|
|
22
22
|
files:
|
23
23
|
- ".gitignore"
|
24
24
|
- Gemfile
|
25
|
+
- Gemfile.lock
|
25
26
|
- Guardfile
|
26
27
|
- LICENSE
|
27
28
|
- README.md
|
@@ -62,6 +63,7 @@ files:
|
|
62
63
|
- lib/bioinform/parsers/motif_splitter.rb
|
63
64
|
- lib/bioinform/support.rb
|
64
65
|
- lib/bioinform/support/strip_doc.rb
|
66
|
+
- lib/bioinform/validator.rb
|
65
67
|
- lib/bioinform/version.rb
|
66
68
|
- spec/alphabet_spec.rb
|
67
69
|
- spec/background_spec.rb
|
@@ -120,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
120
122
|
version: '0'
|
121
123
|
requirements: []
|
122
124
|
rubyforge_project:
|
123
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.5.1
|
124
126
|
signing_key:
|
125
127
|
specification_version: 4
|
126
128
|
summary: Classes for work with different input formats of positional matrices and
|