bioinform 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -2
- data/Gemfile +1 -0
- data/Gemfile.lock +84 -0
- data/TODO.txt +5 -1
- data/lib/bioinform/background.rb +2 -0
- data/lib/bioinform/cli/convert_motif.rb +3 -4
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +6 -5
- data/lib/bioinform/data_models/pcm.rb +25 -11
- data/lib/bioinform/data_models/pm.rb +34 -31
- data/lib/bioinform/data_models/ppm.rb +22 -5
- data/lib/bioinform/data_models/pwm.rb +12 -4
- data/lib/bioinform/errors.rb +8 -1
- data/lib/bioinform/validator.rb +86 -0
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +5 -5
- data/spec/background_spec.rb +4 -4
- data/spec/cli/cli_spec.rb +19 -0
- data/spec/cli/pcm2pwm_spec.rb +10 -10
- data/spec/converters/pcm2ppm_converter_spec.rb +4 -4
- data/spec/converters/pcm2pwm_converter_spec.rb +16 -6
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +5 -5
- data/spec/converters/pwm2pcm_converter_spec.rb +3 -3
- data/spec/data_models/pcm_spec.rb +7 -4
- data/spec/data_models/pm_spec.rb +5 -5
- data/spec/data_models/ppm_spec.rb +4 -4
- data/spec/data_models/pwm_spec.rb +4 -3
- data/spec/formatters/consensus_formatter_spec.rb +1 -1
- data/spec/formatters/raw_formatter_spec.rb +2 -2
- data/spec/parsers/matrix_parser_spec.rb +8 -8
- data/spec/spec_helper_source.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64c5e6fe652d85bd538d08020e5d06779e676fa7
|
4
|
+
data.tar.gz: 954a02afb784d086d006e59e97f57400a10f7858
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3e549ad3b7b20a45ba37dc90c25660af7f1a455cf358aced90991570b76070da5688872141a32e54590a6cd0e65622fe2010b9b27b34466886b15f5a6b7856e
|
7
|
+
data.tar.gz: 81e3dba3f91e3f4acc6838efe5e197da2712f070a4962e3d29bd0df81a0834eb513542ecfcbaca62802cd32fdaa90e883c356d1a008abde9715e5183e52501ee
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
bioinform (0.3.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
coderay (1.1.0)
|
10
|
+
diff-lcs (1.2.5)
|
11
|
+
fakefs (0.4.3)
|
12
|
+
ffi (1.9.10)
|
13
|
+
formatador (0.2.5)
|
14
|
+
given_core (3.7.1)
|
15
|
+
sorcerer (>= 0.3.7)
|
16
|
+
guard (2.13.0)
|
17
|
+
formatador (>= 0.2.4)
|
18
|
+
listen (>= 2.7, <= 4.0)
|
19
|
+
lumberjack (~> 1.0)
|
20
|
+
nenv (~> 0.1)
|
21
|
+
notiffany (~> 0.0)
|
22
|
+
pry (>= 0.9.12)
|
23
|
+
shellany (~> 0.0)
|
24
|
+
thor (>= 0.18.1)
|
25
|
+
guard-compat (1.2.1)
|
26
|
+
guard-rspec (4.6.4)
|
27
|
+
guard (~> 2.1)
|
28
|
+
guard-compat (~> 1.1)
|
29
|
+
rspec (>= 2.99.0, < 4.0)
|
30
|
+
listen (3.0.5)
|
31
|
+
rb-fsevent (>= 0.9.3)
|
32
|
+
rb-inotify (>= 0.9)
|
33
|
+
lumberjack (1.0.9)
|
34
|
+
method_source (0.8.2)
|
35
|
+
nenv (0.2.0)
|
36
|
+
notiffany (0.0.8)
|
37
|
+
nenv (~> 0.1)
|
38
|
+
shellany (~> 0.0)
|
39
|
+
pry (0.10.3)
|
40
|
+
coderay (~> 1.1.0)
|
41
|
+
method_source (~> 0.8.1)
|
42
|
+
slop (~> 3.4)
|
43
|
+
rake (10.4.2)
|
44
|
+
rb-fsevent (0.9.6)
|
45
|
+
rb-inotify (0.9.5)
|
46
|
+
ffi (>= 0.5.0)
|
47
|
+
rspec (3.4.0)
|
48
|
+
rspec-core (~> 3.4.0)
|
49
|
+
rspec-expectations (~> 3.4.0)
|
50
|
+
rspec-mocks (~> 3.4.0)
|
51
|
+
rspec-core (3.4.1)
|
52
|
+
rspec-support (~> 3.4.0)
|
53
|
+
rspec-expectations (3.4.0)
|
54
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
55
|
+
rspec-support (~> 3.4.0)
|
56
|
+
rspec-given (3.7.1)
|
57
|
+
given_core (= 3.7.1)
|
58
|
+
rspec (>= 2.14.0)
|
59
|
+
rspec-mocks (3.4.0)
|
60
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
61
|
+
rspec-support (~> 3.4.0)
|
62
|
+
rspec-support (3.4.1)
|
63
|
+
shellany (0.0.1)
|
64
|
+
slop (3.6.0)
|
65
|
+
sorcerer (1.0.2)
|
66
|
+
spork (0.9.2)
|
67
|
+
thor (0.19.1)
|
68
|
+
wdm (0.1.1)
|
69
|
+
|
70
|
+
PLATFORMS
|
71
|
+
ruby
|
72
|
+
|
73
|
+
DEPENDENCIES
|
74
|
+
bioinform!
|
75
|
+
fakefs (~> 0.4.2)
|
76
|
+
guard-rspec (>= 2.1.0)
|
77
|
+
rake (~> 10.4)
|
78
|
+
rspec (~> 3.0)
|
79
|
+
rspec-given (>= 2.0.0)
|
80
|
+
spork (>= 0.9.2)
|
81
|
+
wdm
|
82
|
+
|
83
|
+
BUNDLED WITH
|
84
|
+
1.11.2
|
data/TODO.txt
CHANGED
@@ -32,4 +32,8 @@ Decide:
|
|
32
32
|
Specs
|
33
33
|
-- PWM#probabilities, #score_variance, #gauss_estimation
|
34
34
|
|
35
|
-
background#to_s and WordwiseBackground#to_s
|
35
|
+
background#to_s and WordwiseBackground#to_s
|
36
|
+
|
37
|
+
SNP should be indexed both by sequence start and relative to snp position
|
38
|
+
|
39
|
+
Интеграция PCM/PWM с iRuby: вывод лого в форме svg/png, печать матриц в tex-формате
|
data/lib/bioinform/background.rb
CHANGED
@@ -50,6 +50,8 @@ module Bioinform
|
|
50
50
|
include Bioinform::Background
|
51
51
|
def initialize(frequencies)
|
52
52
|
@frequencies = frequencies
|
53
|
+
raise Error, 'Frequencies should have 4 components' unless frequencies.length == 4
|
54
|
+
raise Error, 'Frequencies should be in [0;1]' unless frequencies.all?{|el| (0..1).include?(el) }
|
53
55
|
raise Error, 'Sum of Background frequencies should be equal to 1' unless (frequencies.inject(0.0, &:+) - 1.0).abs < 1e-4
|
54
56
|
end
|
55
57
|
|
@@ -27,14 +27,13 @@ module Bioinform
|
|
27
27
|
output_motifs = []
|
28
28
|
motifs = motif_files.map do |filename|
|
29
29
|
input = File.read(filename)
|
30
|
-
motif_info = MotifParser.new.parse(input)
|
31
30
|
case options[:model_from]
|
32
31
|
when 'pwm'
|
33
|
-
MotifModel::PWM.
|
32
|
+
MotifModel::PWM.from_string(input)
|
34
33
|
when 'pcm'
|
35
|
-
MotifModel::PCM.
|
34
|
+
MotifModel::PCM.from_string(input)
|
36
35
|
when 'ppm'
|
37
|
-
MotifModel::PPM.
|
36
|
+
MotifModel::PPM.from_string(input)
|
38
37
|
else
|
39
38
|
raise "Unknown value of model-from parameter: `#{options[:model_from]}`"
|
40
39
|
end
|
@@ -12,16 +12,17 @@ module Bioinform
|
|
12
12
|
@pseudocount = options.fetch(:pseudocount, :log)
|
13
13
|
end
|
14
14
|
|
15
|
-
def calculate_pseudocount(
|
15
|
+
def calculate_pseudocount(pos)
|
16
16
|
case @pseudocount
|
17
17
|
when Numeric
|
18
18
|
@pseudocount
|
19
19
|
when :log
|
20
|
-
|
20
|
+
count = pos.inject(0.0, &:+)
|
21
|
+
Math.log([count, 2].max)
|
21
22
|
when :sqrt
|
22
|
-
Math.sqrt(pcm.
|
23
|
+
Math.sqrt(pcm.inject(0.0, &:+))
|
23
24
|
when Proc
|
24
|
-
@pseudocount.call(
|
25
|
+
@pseudocount.call(pos)
|
25
26
|
else
|
26
27
|
raise Error, 'Unknown pseudocount type use numeric or :log or :sqrt or Proc with taking pcm parameter'
|
27
28
|
end
|
@@ -29,8 +30,8 @@ module Bioinform
|
|
29
30
|
|
30
31
|
def convert(pcm)
|
31
32
|
raise Error, "#{self.class}#convert accepts only models acting as PCM" unless MotifModel.acts_as_pcm?(pcm)
|
32
|
-
actual_pseudocount = calculate_pseudocount(pcm)
|
33
33
|
matrix = pcm.each_position.map do |pos|
|
34
|
+
actual_pseudocount = calculate_pseudocount(pos)
|
34
35
|
count = pos.inject(0.0, &:+)
|
35
36
|
pos.each_index.map do |index|
|
36
37
|
Math.log((pos[index] + @background.frequencies[index] * actual_pseudocount).to_f / (@background.frequencies[index]*(count + actual_pseudocount)) )
|
@@ -7,20 +7,34 @@ module Bioinform
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class PCM < PM
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
def self.count_validator(eps: 1.0e-4)
|
11
|
+
Validator.new{|matrix, alphabet|
|
12
|
+
errors = []
|
13
|
+
unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
14
|
+
errors << "Elements of PCM should be non-negative."
|
15
|
+
end
|
16
|
+
|
17
|
+
warnings = []
|
18
|
+
if eps
|
19
|
+
counts = matrix.map{|pos| pos.inject(0.0, &:+) }
|
20
|
+
unless (counts.max - counts.min) <= eps * counts.min
|
21
|
+
warnings << "PCM counts are different (discrepancy is greater than eps * MinCount; eps=#{eps}; MinCountn=#{counts.min})."
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
ValidationResult.new(errors: errors, warnings: warnings)
|
26
|
+
}
|
14
27
|
end
|
15
28
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
count
|
29
|
+
VALIDATOR = PM::VALIDATOR * PCM.count_validator(eps: 1.0e-4).make_strict
|
30
|
+
DIFFERENT_COUNTS_VALIDATOR = PM::VALIDATOR * PCM.count_validator(eps: nil).make_strict
|
31
|
+
|
32
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PCM::VALIDATOR)
|
33
|
+
super
|
34
|
+
# validator already checked count discrepancy. We store median count.
|
35
|
+
@count = matrix.map{|pos| pos.inject(0.0, &:+) }.sort[matrix.length / 2]
|
23
36
|
end
|
37
|
+
attr_reader :count
|
24
38
|
end
|
25
39
|
end
|
26
40
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../formatters/motif_formatter'
|
2
|
+
require_relative '../validator'
|
2
3
|
require_relative '../errors'
|
3
4
|
require_relative '../alphabet'
|
4
5
|
require_relative 'named_model'
|
@@ -6,47 +7,39 @@ require_relative 'named_model'
|
|
6
7
|
module Bioinform
|
7
8
|
module MotifModel
|
8
9
|
class PM
|
10
|
+
DEFAULT_PARSER = MatrixParser.new
|
11
|
+
TRIVIAL_VALIDATOR = Validator.new{|matrix, alphabet| ValidationResult.all_ok }
|
12
|
+
VALIDATOR = Validator.new{|matrix, alphabet|
|
13
|
+
errors = []
|
14
|
+
errors << "Matrix should be an Array." unless matrix.is_a? Array
|
15
|
+
errors << "Matrix shouldn't be empty." unless matrix.size > 0
|
16
|
+
errors << "Each matrix position should be an Array." unless matrix.all?{|pos| pos.is_a?(Array) }
|
17
|
+
errors << "Each matrix position should be of size compatible with alphabet (=#{alphabet.size})." unless matrix.all?{|pos| pos.size == alphabet.size }
|
18
|
+
errors << "Each matrix element should be Numeric." unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
19
|
+
ValidationResult.new(errors: errors)
|
20
|
+
}
|
21
|
+
|
9
22
|
attr_reader :matrix, :alphabet
|
10
|
-
def initialize(matrix,
|
23
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PM::VALIDATOR)
|
24
|
+
validation_results = validator.validate_params(matrix, alphabet)
|
25
|
+
unless validation_results.valid?
|
26
|
+
raise ValidationError.new('Invalid matrix.', validation_errors: validation_results)
|
27
|
+
end
|
11
28
|
@matrix = matrix
|
12
|
-
@alphabet =
|
13
|
-
raise ValidationError.new('invalid matrix', validation_errors: validation_errors) unless valid?
|
29
|
+
@alphabet = alphabet
|
14
30
|
end
|
15
31
|
|
16
|
-
def self.from_string(input,
|
17
|
-
parser = options.fetch(:parser, MatrixParser.new)
|
18
|
-
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
32
|
+
def self.from_string(input, alphabet: NucleotideAlphabet, parser: DEFAULT_PARSER)
|
19
33
|
info = parser.parse!(input)
|
20
34
|
self.new(info[:matrix], alphabet: alphabet).named( info[:name] )
|
21
35
|
end
|
22
36
|
|
23
|
-
def self.from_file(filename,
|
24
|
-
parser = options.fetch(:parser, MatrixParser.new)
|
25
|
-
alphabet = options.fetch(:alphabet, NucleotideAlphabet)
|
37
|
+
def self.from_file(filename, alphabet: NucleotideAlphabet, parser: DEFAULT_PARSER)
|
26
38
|
info = parser.parse!(File.read(filename))
|
27
39
|
name = (info[:name] && !info[:name].strip.empty?) ? info[:name] : File.basename(filename, File.extname(filename))
|
28
40
|
self.new(info[:matrix], alphabet: alphabet).named( name )
|
29
41
|
end
|
30
42
|
|
31
|
-
def validation_errors
|
32
|
-
errors = []
|
33
|
-
errors << "matrix should be an Array" unless matrix.is_a? Array
|
34
|
-
errors << "matrix shouldn't be empty" unless matrix.size > 0
|
35
|
-
errors << "each matrix position should be an Array" unless matrix.all?{|pos| pos.is_a?(Array) }
|
36
|
-
errors << "each matrix position should be of size compatible with alphabet (=#{alphabet.size})" unless matrix.all?{|pos| pos.size == alphabet.size }
|
37
|
-
errors << "each matrix element should be Numeric" unless matrix.all?{|pos| pos.all?{|el| el.is_a?(Numeric) } }
|
38
|
-
errors
|
39
|
-
end
|
40
|
-
private :validation_errors
|
41
|
-
|
42
|
-
def valid?
|
43
|
-
validation_errors.empty?
|
44
|
-
rescue
|
45
|
-
false
|
46
|
-
end
|
47
|
-
|
48
|
-
private :valid?
|
49
|
-
|
50
43
|
def length
|
51
44
|
matrix.size
|
52
45
|
end
|
@@ -68,15 +61,15 @@ module Bioinform
|
|
68
61
|
end
|
69
62
|
|
70
63
|
def reversed
|
71
|
-
self.class.new(matrix.reverse, alphabet: alphabet)
|
64
|
+
self.class.new(matrix.reverse, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
72
65
|
end
|
73
66
|
|
74
67
|
def complemented
|
75
|
-
self.class.new(complement_matrix, alphabet: alphabet)
|
68
|
+
self.class.new(complement_matrix, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
76
69
|
end
|
77
70
|
|
78
71
|
def reverse_complemented
|
79
|
-
self.class.new(complement_matrix.reverse, alphabet: alphabet)
|
72
|
+
self.class.new(complement_matrix.reverse, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
80
73
|
end
|
81
74
|
|
82
75
|
alias_method :revcomp, :reverse_complemented
|
@@ -88,6 +81,16 @@ module Bioinform
|
|
88
81
|
end
|
89
82
|
private :complement_matrix
|
90
83
|
|
84
|
+
def rounded(precision: 0)
|
85
|
+
return self if !precision
|
86
|
+
rounded_matrix = matrix.map{|pos|
|
87
|
+
pos.map{|el|
|
88
|
+
el.round(precision)
|
89
|
+
}
|
90
|
+
}
|
91
|
+
self.class.new(rounded_matrix, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
92
|
+
end
|
93
|
+
|
91
94
|
# def consensus
|
92
95
|
# ConsensusFormatter.by_maximal_elements.format_string(self)
|
93
96
|
# end
|
@@ -7,11 +7,28 @@ module Bioinform
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class PPM < PM
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
def self.probability_validator(eps: 1.0e-4)
|
11
|
+
Validator.new{|matrix, alphabet|
|
12
|
+
errors = []
|
13
|
+
unless matrix.all?{|pos| pos.all?{|el| el >= 0 } }
|
14
|
+
errors << "Elements of PPM should be non-negative."
|
15
|
+
end
|
16
|
+
|
17
|
+
warnings = []
|
18
|
+
probability_sums = matrix.map{|pos| pos.inject(0.0, &:+) }
|
19
|
+
max_discrepancy = probability_sums.map{|sum| (sum - 1.0).abs }.max
|
20
|
+
unless max_discrepancy <= eps
|
21
|
+
warnings << "PPM should sum up to 1, with discrepancy not greater than #{eps}."
|
22
|
+
end
|
23
|
+
|
24
|
+
ValidationResult.new(errors: errors, warnings: warnings)
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
VALIDATOR = PM::VALIDATOR * PPM.probability_validator(eps: 1.0e-4).make_strict
|
29
|
+
|
30
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PPM::VALIDATOR)
|
31
|
+
super # default validator redefined
|
15
32
|
end
|
16
33
|
end
|
17
34
|
end
|
@@ -7,6 +7,11 @@ module Bioinform
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class PWM < PM
|
10
|
+
VALIDATOR = PM::VALIDATOR
|
11
|
+
def initialize(matrix, alphabet: NucleotideAlphabet, validator: PWM::VALIDATOR)
|
12
|
+
super # default validator redefined
|
13
|
+
end
|
14
|
+
|
10
15
|
def score(word)
|
11
16
|
raise Error, 'Word length should be the same as PWM length' unless word.length == length
|
12
17
|
length.times.map do |pos|
|
@@ -14,10 +19,13 @@ module Bioinform
|
|
14
19
|
end.inject(0.0, &:+)
|
15
20
|
end
|
16
21
|
|
17
|
-
def discreted(rate,
|
18
|
-
|
19
|
-
|
20
|
-
|
22
|
+
def discreted(rate, rounding_method: :ceil)
|
23
|
+
discreted_matrix = matrix.map{|position|
|
24
|
+
position.map{|element|
|
25
|
+
(element * rate).send(rounding_method)
|
26
|
+
}
|
27
|
+
}
|
28
|
+
self.class.new(discreted_matrix, alphabet: alphabet, validator: TRIVIAL_VALIDATOR)
|
21
29
|
end
|
22
30
|
|
23
31
|
def zero_column
|
data/lib/bioinform/errors.rb
CHANGED
@@ -11,7 +11,14 @@ module Bioinform
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def to_s
|
14
|
-
|
14
|
+
case @validation_errors
|
15
|
+
when Array
|
16
|
+
"#{super} (#{@validation_errors.join('; ')})"
|
17
|
+
when ValidationResult
|
18
|
+
"#{super}\n#{@validation_errors}"
|
19
|
+
else
|
20
|
+
"#{super} (#{@validation_errors})"
|
21
|
+
end
|
15
22
|
end
|
16
23
|
end
|
17
24
|
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Bioinform
|
2
|
+
class ValidationResult
|
3
|
+
attr_reader :errors, :warnings
|
4
|
+
def initialize(errors: [], warnings: [])
|
5
|
+
@errors = errors.freeze
|
6
|
+
@warnings = warnings.freeze
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.all_ok
|
10
|
+
self.new(errors: [], warnings: [])
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?
|
14
|
+
errors.empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
msg = ""
|
19
|
+
|
20
|
+
if errors && !errors.empty?
|
21
|
+
msg += "Errors:\n" + errors.join("\n") + "\n"
|
22
|
+
end
|
23
|
+
|
24
|
+
if warnings && !warnings.empty?
|
25
|
+
msg += "Warnings:\n" + warnings.join("\n")
|
26
|
+
end
|
27
|
+
|
28
|
+
msg.empty? ? "{No errors, no warnings}" : "{#{msg}}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def with_errors(additional_errors)
|
32
|
+
ValidationResult.new(errors: errors + additional_errors, warnings: warnings)
|
33
|
+
end
|
34
|
+
|
35
|
+
def with_warnings(additional_warnings)
|
36
|
+
ValidationResult.new(errors: errors, warnings: warnings + additional_warnings)
|
37
|
+
end
|
38
|
+
|
39
|
+
# errors from both operands
|
40
|
+
def +(other)
|
41
|
+
ValidationResult.new(errors: errors + other.errors, warnings: warnings + other.warnings)
|
42
|
+
end
|
43
|
+
|
44
|
+
def hash
|
45
|
+
[@errors, @warnings].hash
|
46
|
+
end
|
47
|
+
def eql?(other)
|
48
|
+
(other.class == self.class) && (errors == other.errors) && (warnings == other.warnings)
|
49
|
+
end
|
50
|
+
def ==(other)
|
51
|
+
other.is_a?(ValidationResult) && (errors == other.errors) && (warnings == other.warnings)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Validator
|
56
|
+
def initialize(&block)
|
57
|
+
if block_given?
|
58
|
+
@validation_block = block
|
59
|
+
else
|
60
|
+
@validation_block = ->(*args, &b){ ValidationResult.new }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def validate_params(*args, &block)
|
65
|
+
@validation_block.call(*args, &block)
|
66
|
+
rescue => e
|
67
|
+
msg = "Unexpected error occured during validation: #{e.message}. Backtrace:\n" + e.backtrace.join("\n")
|
68
|
+
ValidationResult.new(errors: [msg])
|
69
|
+
end
|
70
|
+
|
71
|
+
# Validate both
|
72
|
+
def *(other)
|
73
|
+
Validator.new{|*args, &b|
|
74
|
+
validate_params(*args, &b) + other.validate_params(*args, &b)
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
# treat warnings as errors
|
79
|
+
def make_strict
|
80
|
+
Validator.new{|*args, &block|
|
81
|
+
result = self.validate_params(*args, &block)
|
82
|
+
ValidationResult.new(errors: result.errors + result.warnings)
|
83
|
+
}
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/lib/bioinform/version.rb
CHANGED
data/spec/alphabet_spec.rb
CHANGED
@@ -2,9 +2,9 @@ require 'bioinform/alphabet'
|
|
2
2
|
|
3
3
|
describe Bioinform::ComplementableAlphabet do
|
4
4
|
specify "should raise if complement's complement is not original letter" do
|
5
|
-
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A]) }.to raise_error
|
6
|
-
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:B,:C], [:C,:B,:B,:A]) }.to raise_error
|
7
|
-
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A,:C]) }.to raise_error
|
5
|
+
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A]) }.to raise_error(Bioinform::Error)
|
6
|
+
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:B,:C], [:C,:B,:B,:A]) }.to raise_error(Bioinform::Error)
|
7
|
+
expect{ Bioinform::ComplementableAlphabet.new([:A,:B,:X,:Y], [:X,:Y,:B,:A,:C]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
|
10
10
|
context 'usage with alphabet non-symbolized, non-upcased' do
|
@@ -48,10 +48,10 @@ describe Bioinform::NucleotideAlphabet do
|
|
48
48
|
specify { expect( Bioinform::NucleotideAlphabet.size ).to eq 4 }
|
49
49
|
specify { expect( Bioinform::NucleotideAlphabet.complement_letter(:A) ).to eq :T }
|
50
50
|
specify { expect( Bioinform::NucleotideAlphabet.complement_letter(:C) ).to eq :G }
|
51
|
-
specify { expect{ Bioinform::NucleotideAlphabet.complement_letter(:N) }.to raise_error
|
51
|
+
specify { expect{ Bioinform::NucleotideAlphabet.complement_letter(:N) }.to raise_error(Bioinform::Error) }
|
52
52
|
|
53
53
|
specify { expect(Bioinform::NucleotideAlphabet.complement_index(0)).to eq 3 }
|
54
|
-
specify { expect{Bioinform::NucleotideAlphabet.complement_index(4)}.to raise_error
|
54
|
+
specify { expect{Bioinform::NucleotideAlphabet.complement_index(4)}.to raise_error(Bioinform::Error)}
|
55
55
|
end
|
56
56
|
|
57
57
|
describe Bioinform::NucleotideAlphabetWithN do
|
data/spec/background_spec.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'bioinform/background'
|
2
2
|
|
3
3
|
describe Bioinform::Frequencies do
|
4
|
-
specify { expect{Bioinform::Frequencies.new([1,1,1,1]) }.to raise_error
|
5
|
-
specify { expect{Bioinform::Frequencies.new([0.3,0.3,0.3,0.3]) }.to raise_error
|
4
|
+
specify { expect{Bioinform::Frequencies.new([1,1,1,1]) }.to raise_error(Bioinform::Error) }
|
5
|
+
specify { expect{Bioinform::Frequencies.new([0.3,0.3,0.3,0.3]) }.to raise_error(Bioinform::Error) }
|
6
6
|
specify { expect{Bioinform::Frequencies.new([0.25,0.25,0.25,0.25]) }.not_to raise_error }
|
7
7
|
specify { expect{Bioinform::Frequencies.new([0.2,0.3,0.3,0.2]) }.not_to raise_error }
|
8
8
|
|
@@ -52,6 +52,6 @@ describe Bioinform::Background do
|
|
52
52
|
specify { expect(Bioinform::Background.from_string('uniform')).to eq Bioinform::Frequencies.new([0.25,0.25,0.25,0.25]) }
|
53
53
|
specify { expect(Bioinform::Background.from_string('UNIFORM')).to eq Bioinform::Frequencies.new([0.25,0.25,0.25,0.25]) }
|
54
54
|
specify { expect(Bioinform::Background.from_string('wordwise')).to eq Bioinform::WordwiseBackground.new }
|
55
|
-
specify { expect{Bioinform::Background.from_string('0.25,0.25,0.25')}.to raise_error
|
56
|
-
specify { expect{Bioinform::Background.from_string('unifromm')}.to raise_error
|
55
|
+
specify { expect{Bioinform::Background.from_string('0.25,0.25,0.25')}.to raise_error(Bioinform::Error) }
|
56
|
+
specify { expect{Bioinform::Background.from_string('unifromm')}.to raise_error(Bioinform::Error) }
|
57
57
|
end
|
data/spec/cli/cli_spec.rb
CHANGED
@@ -1,6 +1,25 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
require 'bioinform/cli'
|
3
3
|
|
4
|
+
def compare_positions(pos_1, pos_2, eps: 1e-6)
|
5
|
+
pos_1.zip(pos_2).all?{|el_1, el_2|
|
6
|
+
(el_1 - el_2).abs <= eps
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
def compare_matrices(matrix_1, matrix_2, eps: 1e-6)
|
11
|
+
matrix_1.length == matrix_2.length && \
|
12
|
+
matrix_1.zip(matrix_2).all?{|pos_1, pos_2|
|
13
|
+
compare_positions(pos_1, pos_2, eps: eps)
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
def compare_models_in_files(file_1, file_2, klass: Bioinform::MotifModel::PM, eps: 1e-6)
|
18
|
+
pm_1 = klass.from_file(file_1)
|
19
|
+
pm_2 = klass.from_file(file_2)
|
20
|
+
pm_1.name == pm_2.name && compare_matrices(pm_1.matrix, pm_2.matrix)
|
21
|
+
end
|
22
|
+
|
4
23
|
describe Bioinform::CLI do
|
5
24
|
describe '.change_folder_and_extension' do
|
6
25
|
it 'should change extension and folder' do
|
data/spec/cli/pcm2pwm_spec.rb
CHANGED
@@ -24,56 +24,56 @@ describe Bioinform::CLI::PCM2PWM do
|
|
24
24
|
it 'should transform single PCM to PWM' do
|
25
25
|
run_pcm2pwm('KLF4_f2.pcm')
|
26
26
|
expect(File.exist?('KLF4_f2.pwm')).to be_truthy
|
27
|
-
expect(
|
27
|
+
expect(compare_models_in_files('KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
28
28
|
end
|
29
29
|
|
30
30
|
it 'should transform multiple PCMs to PWMs' do
|
31
31
|
run_pcm2pwm('KLF4_f2.pcm SP1_f1.pcm')
|
32
32
|
|
33
33
|
expect(File.exist?('KLF4_f2.pwm')).to be_truthy
|
34
|
-
expect(
|
34
|
+
expect(compare_models_in_files('KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
35
35
|
|
36
36
|
expect(File.exist?('SP1_f1.pwm')).to be_truthy
|
37
|
-
expect(
|
37
|
+
expect(compare_models_in_files('SP1_f1.pwm', 'SP1_f1.pwm.result')).to be_truthy
|
38
38
|
end
|
39
39
|
|
40
40
|
it 'should transform extension to specified with --extension option' do
|
41
41
|
run_pcm2pwm('KLF4_f2.pcm --extension=pat')
|
42
42
|
expect(File.exist?('KLF4_f2.pat')).to be_truthy
|
43
|
-
expect(
|
43
|
+
expect(compare_models_in_files('KLF4_f2.pat', 'KLF4_f2.pwm.result')).to be_truthy
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'should save PWMs into folder specified with --folder option when folder exists' do
|
47
47
|
Dir.mkdir('pwm_folder') unless Dir.exist?('pwm_folder')
|
48
48
|
run_pcm2pwm('KLF4_f2.pcm --folder=pwm_folder')
|
49
49
|
expect(File.exist?('pwm_folder/KLF4_f2.pwm')).to be_truthy
|
50
|
-
expect(
|
50
|
+
expect(compare_models_in_files('pwm_folder/KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
51
51
|
end
|
52
52
|
it 'should save PWMs into folder specified with --folder option' do
|
53
53
|
FileUtils.rm_rf('pwm_folder') if Dir.exist?('pwm_folder')
|
54
54
|
run_pcm2pwm('KLF4_f2.pcm --folder=pwm_folder')
|
55
55
|
expect(File.exist?('pwm_folder/KLF4_f2.pwm')).to be_truthy
|
56
|
-
expect(
|
56
|
+
expect(compare_models_in_files('pwm_folder/KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'should process PCMs with names obtained from STDIN' do
|
60
60
|
provide_stdin('KLF4_f2.pcm SP1_f1.pcm') { run_pcm2pwm('') }
|
61
61
|
expect(File.exist?('KLF4_f2.pwm')).to be_truthy
|
62
|
-
expect(
|
62
|
+
expect(compare_models_in_files('KLF4_f2.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
63
63
|
|
64
64
|
expect(File.exist?('SP1_f1.pwm')).to be_truthy
|
65
|
-
expect(
|
65
|
+
expect(compare_models_in_files('SP1_f1.pwm', 'SP1_f1.pwm.result')).to be_truthy
|
66
66
|
end
|
67
67
|
|
68
68
|
it 'should process PCMs with names obtained from STDIN when there are some options' do
|
69
69
|
provide_stdin('KLF4_f2.pcm') { run_pcm2pwm('-e pat') }
|
70
70
|
expect(File.exist?('KLF4_f2.pat')).to be_truthy
|
71
|
-
expect(
|
71
|
+
expect(compare_models_in_files('KLF4_f2.pat', 'KLF4_f2.pwm.result')).to be_truthy
|
72
72
|
end
|
73
73
|
|
74
74
|
it 'should process PCMs having filename with spaces' do
|
75
75
|
run_pcm2pwm('"KLF4 f2 spaced name.pcm"')
|
76
76
|
expect(File.exist?('KLF4 f2 spaced name.pwm')).to be_truthy
|
77
|
-
expect(
|
77
|
+
expect(compare_models_in_files('KLF4 f2 spaced name.pwm', 'KLF4_f2.pwm.result')).to be_truthy
|
78
78
|
end
|
79
79
|
end
|
@@ -4,7 +4,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PPMConverter do
|
|
4
4
|
let(:pcm) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4]]) }
|
5
5
|
let(:pwm) { Bioinform::MotifModel::PWM.new([[1,2,3,4],[2,2,2,4]]) }
|
6
6
|
let(:ppm) { Bioinform::MotifModel::PPM.new([[0.1,0.2,0.3,0.4],[0.2,0.2,0.2,0.4]]) }
|
7
|
-
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]]) }
|
7
|
+
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]], validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR) }
|
8
8
|
|
9
9
|
let(:named_pcm) { Bioinform::MotifModel::NamedModel.new(pcm, 'motif name') }
|
10
10
|
let(:named_pwm) { Bioinform::MotifModel::NamedModel.new(pwm, 'motif name') }
|
@@ -26,7 +26,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PPMConverter do
|
|
26
26
|
specify { expect(converter.convert(named_pcm)).to be_kind_of Bioinform::MotifModel::NamedModel }
|
27
27
|
specify { expect(converter.convert(named_pcm).model).to be_kind_of Bioinform::MotifModel::PPM }
|
28
28
|
specify { expect(converter.convert(named_pcm).name).to eq 'motif name' }
|
29
|
-
specify { expect{ converter.convert(pwm) }.to raise_error
|
30
|
-
specify { expect{ converter.convert(named_pwm) }.to raise_error
|
31
|
-
specify { expect{ converter.convert(ppm) }.to raise_error
|
29
|
+
specify { expect{ converter.convert(pwm) }.to raise_error(Bioinform::Error) }
|
30
|
+
specify { expect{ converter.convert(named_pwm) }.to raise_error(Bioinform::Error) }
|
31
|
+
specify { expect{ converter.convert(ppm) }.to raise_error(Bioinform::Error) }
|
32
32
|
end
|
@@ -4,7 +4,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PWMConverter do
|
|
4
4
|
let(:pcm) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4]]) }
|
5
5
|
let(:pwm) { Bioinform::MotifModel::PWM.new([[1,2,3,4],[2,2,2,4]]) }
|
6
6
|
let(:ppm) { Bioinform::MotifModel::PPM.new([[0.1,0.2,0.3,0.4],[0.2,0.2,0.2,0.4]]) }
|
7
|
-
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]]) }
|
7
|
+
let(:pcm_different_counts) { Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4],[3,3,3,4]], validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR) }
|
8
8
|
|
9
9
|
let(:named_pcm) { Bioinform::MotifModel::NamedModel.new(pcm, 'motif name') }
|
10
10
|
let(:named_pwm) { Bioinform::MotifModel::NamedModel.new(pwm, 'motif name') }
|
@@ -16,7 +16,7 @@ describe Bioinform::ConversionAlgorithms::PCM2PWMConverter do
|
|
16
16
|
specify { expect(converter.background).to eq Bioinform::Background::Uniform }
|
17
17
|
|
18
18
|
specify { expect(converter.convert(pcm)).to be_kind_of Bioinform::MotifModel::PWM }
|
19
|
-
specify { expect(converter.calculate_pseudocount(pcm)).to eq Math.log(10) }
|
19
|
+
specify { expect(converter.calculate_pseudocount(pcm.matrix[0])).to eq Math.log(10) }
|
20
20
|
|
21
21
|
specify do
|
22
22
|
cnt = 10
|
@@ -27,14 +27,24 @@ describe Bioinform::ConversionAlgorithms::PCM2PWMConverter do
|
|
27
27
|
[Math.log((2+k*0.25)/den), Math.log((2+k*0.25)/den), Math.log((2+k*0.25)/den), Math.log((4+k*0.25)/den)] ]
|
28
28
|
end
|
29
29
|
|
30
|
-
specify { expect{ converter.convert(pcm_different_counts) }.
|
30
|
+
specify { expect{ converter.convert(pcm_different_counts) }.not_to raise_error }
|
31
|
+
specify {
|
32
|
+
counts = [10, 10, 13]
|
33
|
+
pseudocounts = counts.map{|el| Math.log(el) }
|
34
|
+
denominators = counts.zip(pseudocounts).map{|count, pseudocount| 0.25 * (count + pseudocount) }
|
35
|
+
expect(converter.convert(pcm_different_counts).matrix).to eq [
|
36
|
+
[1,2,3,4].map{|el| Math.log((el + pseudocounts[0]*0.25) / denominators[0]) },
|
37
|
+
[2,2,2,4].map{|el| Math.log((el + pseudocounts[1]*0.25) / denominators[1]) },
|
38
|
+
[3,3,3,4].map{|el| Math.log((el + pseudocounts[2]*0.25) / denominators[2]) },
|
39
|
+
]
|
40
|
+
}
|
31
41
|
|
32
42
|
specify { expect(converter.convert(named_pcm)).to be_kind_of Bioinform::MotifModel::NamedModel }
|
33
43
|
specify { expect(converter.convert(named_pcm).model).to be_kind_of Bioinform::MotifModel::PWM }
|
34
44
|
specify { expect(converter.convert(named_pcm).name).to eq 'motif name' }
|
35
|
-
specify { expect{ converter.convert(pwm) }.to raise_error
|
36
|
-
specify { expect{ converter.convert(named_pwm) }.to raise_error
|
37
|
-
specify { expect{ converter.convert(ppm) }.to raise_error
|
45
|
+
specify { expect{ converter.convert(pwm) }.to raise_error(Bioinform::Error) }
|
46
|
+
specify { expect{ converter.convert(named_pwm) }.to raise_error(Bioinform::Error) }
|
47
|
+
specify { expect{ converter.convert(ppm) }.to raise_error(Bioinform::Error) }
|
38
48
|
end
|
39
49
|
|
40
50
|
context 'with specified explicitly pseudocount' do
|
@@ -8,7 +8,7 @@ describe Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter do
|
|
8
8
|
specify{ expect(converter.iupac_alphabet).to eq Bioinform::NucleotideAlphabetWithN }
|
9
9
|
specify 'can convert only PWMs' do
|
10
10
|
pcm = Bioinform::MotifModel::PCM.new([[1,2,3,4],[2,2,2,4]])
|
11
|
-
expect { converter.convert(pcm) }.to raise_error
|
11
|
+
expect { converter.convert(pcm) }.to raise_error(Bioinform::Error)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -28,12 +28,12 @@ describe Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter do
|
|
28
28
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:C,:G,:T,:N], [:T,:G,:C,:A,:N])
|
29
29
|
custom_matrix = [[1,2,3,1.567, 0.1],[12,-11,12,0, 0.1],[-1.1, 0.6, 0.4, 0.321, 0.1]]
|
30
30
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(custom_matrix, alphabet: custom_alphabet)
|
31
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
31
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
32
32
|
end
|
33
33
|
specify do
|
34
34
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:X,:Y,:T], [:T,:Y,:X,:A])
|
35
35
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(matrix, alphabet: custom_alphabet)
|
36
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
36
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
37
37
|
end
|
38
38
|
|
39
39
|
end
|
@@ -54,12 +54,12 @@ describe Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter do
|
|
54
54
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:C,:G,:T,:N], [:T,:G,:C,:A,:N])
|
55
55
|
custom_matrix = [[1,2,3,1.567, 0.1],[12,-11,12,0, 0.1],[-1.1, 0.6, 0.4, 0.321, 0.1]]
|
56
56
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(custom_matrix, alphabet: custom_alphabet)
|
57
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
57
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
58
58
|
end
|
59
59
|
specify do
|
60
60
|
custom_alphabet = Bioinform::ComplementableAlphabet.new([:A,:X,:Y,:T], [:T,:Y,:X,:A])
|
61
61
|
pwm_w_custom_alphabet = Bioinform::MotifModel::PWM.new(matrix, alphabet: custom_alphabet)
|
62
|
-
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error
|
62
|
+
expect { converter.convert(pwm_w_custom_alphabet) }.to raise_error(Bioinform::Error)
|
63
63
|
end
|
64
64
|
end
|
65
65
|
end
|
@@ -50,8 +50,8 @@ describe Bioinform::ConversionAlgorithms::PWM2PCMConverter do
|
|
50
50
|
specify { expect(converter.convert(named_pwm).model).to be_kind_of Bioinform::MotifModel::PCM }
|
51
51
|
specify { expect(converter.convert(named_pwm).name).to eq 'motif name' }
|
52
52
|
|
53
|
-
specify { expect{ converter.convert(pcm) }.to raise_error
|
54
|
-
specify { expect{ converter.convert(ppm) }.to raise_error
|
55
|
-
specify { expect{ converter.convert(named_pcm) }.to raise_error
|
53
|
+
specify { expect{ converter.convert(pcm) }.to raise_error(Bioinform::Error) }
|
54
|
+
specify { expect{ converter.convert(ppm) }.to raise_error(Bioinform::Error) }
|
55
|
+
specify { expect{ converter.convert(named_pcm) }.to raise_error(Bioinform::Error) }
|
56
56
|
end
|
57
57
|
end
|
@@ -4,7 +4,7 @@ describe Bioinform::MotifModel::PCM do
|
|
4
4
|
|
5
5
|
describe '.new' do
|
6
6
|
specify 'fails on matrix having negative elements' do
|
7
|
-
expect { Bioinform::MotifModel::PCM.new([[1,2,1,3],[3,3,0,1], [-2, 3, 3, 3]]) }.to raise_error
|
7
|
+
expect { Bioinform::MotifModel::PCM.new([[1,2,1,3],[3,3,0,1], [-2, 3, 3, 3]]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
|
10
10
|
context 'with valid matrix' do
|
@@ -21,10 +21,13 @@ describe Bioinform::MotifModel::PCM do
|
|
21
21
|
context 'with different counts in different positions' do
|
22
22
|
let(:matrix) { [[1,2,1,3],[30,10,100,11000], [1, 0, 3, 3]] }
|
23
23
|
specify do
|
24
|
-
expect { Bioinform::MotifModel::PCM.new(matrix) }.
|
24
|
+
expect { Bioinform::MotifModel::PCM.new(matrix) }.to raise_error(Bioinform::Error)
|
25
25
|
end
|
26
26
|
specify do
|
27
|
-
expect
|
27
|
+
expect { Bioinform::MotifModel::PCM.new(matrix, validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR) }.not_to raise_error
|
28
|
+
end
|
29
|
+
specify do
|
30
|
+
expect( Bioinform::MotifModel::PCM.new(matrix, validator: Bioinform::MotifModel::PCM::DIFFERENT_COUNTS_VALIDATOR).matrix ).to eq matrix
|
28
31
|
end
|
29
32
|
end
|
30
33
|
end
|
@@ -45,7 +48,7 @@ describe Bioinform::MotifModel::PCM do
|
|
45
48
|
|
46
49
|
context 'with different counts in different positions' do
|
47
50
|
let(:matrix) { [[1,2,1,3],[30,10,100,11000], [1, 0, 3, 3]] }
|
48
|
-
specify{ expect{ pcm.count }.to raise_error
|
51
|
+
specify{ expect{ pcm.count }.to raise_error(Bioinform::Error) }
|
49
52
|
end
|
50
53
|
end
|
51
54
|
|
data/spec/data_models/pm_spec.rb
CHANGED
@@ -4,19 +4,19 @@ describe Bioinform::MotifModel::PM do
|
|
4
4
|
|
5
5
|
describe '.new' do
|
6
6
|
specify 'with matrix having more than 4 elements in a position' do
|
7
|
-
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error
|
7
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
|
10
10
|
specify 'with matrix having less than 4 elements in a position' do
|
11
|
-
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error
|
11
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error(Bioinform::Error)
|
12
12
|
end
|
13
13
|
|
14
14
|
specify 'with matrix having positions in rows, nucleotides in columns' do
|
15
|
-
expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error
|
15
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error(Bioinform::Error)
|
16
16
|
end
|
17
17
|
|
18
18
|
specify 'with empty matrix' do
|
19
|
-
expect { Bioinform::MotifModel::PM.new([]) }.to raise_error
|
19
|
+
expect { Bioinform::MotifModel::PM.new([]) }.to raise_error(Bioinform::Error)
|
20
20
|
end
|
21
21
|
|
22
22
|
context 'with valid matrix' do
|
@@ -105,7 +105,7 @@ describe Bioinform::MotifModel::PM do
|
|
105
105
|
let(:matrix_15) { [[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0],
|
106
106
|
[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
|
107
107
|
specify do
|
108
|
-
expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error
|
108
|
+
expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error(Bioinform::Error)
|
109
109
|
end
|
110
110
|
specify do
|
111
111
|
expect{ Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }.not_to raise_error
|
@@ -4,12 +4,12 @@ describe Bioinform::MotifModel::PPM do
|
|
4
4
|
|
5
5
|
describe '.new' do
|
6
6
|
specify 'fails on matrix having negative elements' do
|
7
|
-
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, -0.1, -0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error
|
7
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, -0.1, -0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error(Bioinform::Error)
|
8
8
|
end
|
9
9
|
specify 'fails on matrix having sum of position elements different from 1' do
|
10
|
-
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, 0.1, 0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error
|
11
|
-
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.3, 0.1, 0.1, 0.3],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error
|
12
|
-
expect { Bioinform::MotifModel::PPM.new([[0.3, 0.1, 0.1, 0.3],[0.3, 0.1, 0.1, 0.3],[0.2, 0.2, 0.2, 0.2]]) }.to raise_error
|
10
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, 0.1, 0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error(Bioinform::Error)
|
11
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.3, 0.1, 0.1, 0.3],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error(Bioinform::Error)
|
12
|
+
expect { Bioinform::MotifModel::PPM.new([[0.3, 0.1, 0.1, 0.3],[0.3, 0.1, 0.1, 0.3],[0.2, 0.2, 0.2, 0.2]]) }.to raise_error(Bioinform::Error)
|
13
13
|
end
|
14
14
|
|
15
15
|
context 'with valid matrix' do
|
@@ -57,12 +57,12 @@ describe Bioinform::MotifModel::PWM do
|
|
57
57
|
end
|
58
58
|
|
59
59
|
describe '#left_augmented' do
|
60
|
-
specify { expect{pwm.left_augmented(-1)}.to raise_error
|
60
|
+
specify { expect{pwm.left_augmented(-1)}.to raise_error(Bioinform::Error) }
|
61
61
|
specify { expect(pwm.left_augmented(0)).to eq pwm }
|
62
62
|
specify { expect(pwm.left_augmented(2)).to eq Bioinform::MotifModel::PWM.new( [[0,0,0,0],[0,0,0,0],[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]]) }
|
63
63
|
end
|
64
64
|
describe '#right_augmented' do
|
65
|
-
specify { expect{pwm.right_augmented(-1)}.to raise_error
|
65
|
+
specify { expect{pwm.right_augmented(-1)}.to raise_error(Bioinform::Error) }
|
66
66
|
specify { expect(pwm.right_augmented(0)).to eq pwm }
|
67
67
|
specify { expect(pwm.right_augmented(2)).to eq Bioinform::MotifModel::PWM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321],[0,0,0,0],[0,0,0,0]]) }
|
68
68
|
end
|
@@ -93,7 +93,8 @@ describe Bioinform::MotifModel::PWM do
|
|
93
93
|
[-1, 1, 1, 1, 12,22,32,42,52,62, 702,802,902,1002, 10002 ]] }
|
94
94
|
specify { expect(pwm.discreted(1).alphabet).to eq Bioinform::IUPACAlphabet}
|
95
95
|
|
96
|
-
|
96
|
+
let(:iupac_converter) { Bioinform::ConversionAlgorithms::PWM2IupacPWMConverter.new }
|
97
|
+
specify { expect{ iupac_converter.convert(pwm) }.to raise_error(Bioinform::Error) }
|
97
98
|
end
|
98
99
|
|
99
100
|
describe '.from_string' do
|
@@ -4,7 +4,7 @@ require 'bioinform/formatters/consensus_formatter'
|
|
4
4
|
describe Bioinform::ConsensusFormatter do
|
5
5
|
let(:pm) { Bioinform::MotifModel::PM.new([[10,30,10,28], [30,16,16,16], [12,30,10,26], [26,27,27,1]]) }
|
6
6
|
|
7
|
-
specify('.new without a block raises error') { expect{ Bioinform::ConsensusFormatter.new }.to raise_error
|
7
|
+
specify('.new without a block raises error') { expect{ Bioinform::ConsensusFormatter.new }.to raise_error(Bioinform::Error) }
|
8
8
|
|
9
9
|
context 'custom formatter' do
|
10
10
|
let(:formatter){ Bioinform::ConsensusFormatter.new{|pos, el, ind| (pos.max - el) < pos.max * 0.1 } }
|
@@ -24,7 +24,7 @@ describe Bioinform::MotifFormatter do
|
|
24
24
|
end
|
25
25
|
context 'with with_name equal to true' do
|
26
26
|
let(:formatter) { Bioinform::MotifFormatter.new(with_name: true) }
|
27
|
-
specify { expect{ formatter.format(motif) }.to raise_error
|
27
|
+
specify { expect{ formatter.format(motif) }.to raise_error(Bioinform::Error) }
|
28
28
|
specify { expect( formatter.format(motif.named('')) ).to eq ">\n" +
|
29
29
|
default_matrix_string }
|
30
30
|
specify { expect( formatter.format(motif.named('Stub name')) ).to eq ">Stub name\n" +
|
@@ -38,7 +38,7 @@ describe Bioinform::MotifFormatter do
|
|
38
38
|
default_matrix_string }
|
39
39
|
end
|
40
40
|
context 'with with_name value different from true/false/:auto' do
|
41
|
-
specify{ expect { Bioinform::MotifFormatter.new(with_name: :somewhat) }.to raise_error
|
41
|
+
specify{ expect { Bioinform::MotifFormatter.new(with_name: :somewhat) }.to raise_error(Bioinform::Error) }
|
42
42
|
end
|
43
43
|
|
44
44
|
context 'with nucleotides_in :columns' do
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'bioinform/parsers/matrix_parser'
|
2
2
|
|
3
3
|
describe Bioinform::MatrixParser do
|
4
|
-
specify { expect{ Bioinform::MatrixParser.new(nucleotides_in: :somewhat) }.to raise_error
|
4
|
+
specify { expect{ Bioinform::MatrixParser.new(nucleotides_in: :somewhat) }.to raise_error(Bioinform::Error) }
|
5
5
|
|
6
6
|
context 'with default options' do
|
7
7
|
subject(:parser) { Bioinform::MatrixParser.new }
|
@@ -40,9 +40,9 @@ describe Bioinform::MatrixParser do
|
|
40
40
|
let(:input_not_allowed_2) {"A\tC\tG\tT\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
41
41
|
let(:input_not_allowed_3) {"##01\t1\t2\t3\t4\n" + "##02\t11\t12\t13\t14" }
|
42
42
|
specify { expect( parser.parse!(input_allowed) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
43
|
-
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error
|
44
|
-
specify { expect{ parser.parse!(input_not_allowed_2) }.to raise_error
|
45
|
-
specify { expect{ parser.parse!(input_not_allowed_3) }.to raise_error
|
43
|
+
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error(Bioinform::Error) }
|
44
|
+
specify { expect{ parser.parse!(input_not_allowed_2) }.to raise_error(Bioinform::Error) }
|
45
|
+
specify { expect{ parser.parse!(input_not_allowed_3) }.to raise_error(Bioinform::Error) }
|
46
46
|
end
|
47
47
|
context 'with has_name equal to :auto parser can either have name or not' do
|
48
48
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: :auto) }
|
@@ -51,7 +51,7 @@ describe Bioinform::MatrixParser do
|
|
51
51
|
let(:input_with_bad_name) {"-Name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
52
52
|
specify { expect( parser.parse!(input_without_name) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
53
53
|
specify { expect( parser.parse!(input_with_name) ).to eq( {name: 'PM Name', matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
54
|
-
specify { expect{ parser.parse!(input_with_bad_name) }.to raise_error
|
54
|
+
specify { expect{ parser.parse!(input_with_bad_name) }.to raise_error(Bioinform::Error) }
|
55
55
|
end
|
56
56
|
context 'parser having name and header row' do
|
57
57
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: true, has_header_row: true) }
|
@@ -62,7 +62,7 @@ describe Bioinform::MatrixParser do
|
|
62
62
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: false, has_header_row: true) }
|
63
63
|
let(:input) {"A\tC\tG\tT\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
64
64
|
specify { expect( parser.parse!(input) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
65
|
-
specify { expect{ parser.parse!("Motif name\n" + input) }.to raise_error
|
65
|
+
specify { expect{ parser.parse!("Motif name\n" + input) }.to raise_error(Bioinform::Error) }
|
66
66
|
end
|
67
67
|
context 'parser having header column' do
|
68
68
|
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_header_column: true) }
|
@@ -101,7 +101,7 @@ describe Bioinform::MatrixParser do
|
|
101
101
|
let(:input_allowed) {"NA>Motif name\tother info\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
102
102
|
let(:input_not_allowed) {"Motif name\tother info\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
103
103
|
specify { expect( parser.parse!(input_allowed) ).to eq( {name: "Motif name\tother info", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
104
|
-
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error
|
104
|
+
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error(Bioinform::Error) }
|
105
105
|
end
|
106
106
|
|
107
107
|
context 'parser reducing number of nucleotides' do
|
@@ -127,7 +127,7 @@ describe Bioinform::MatrixParser do
|
|
127
127
|
context 'parser which hasn\'t enough number of nucleotides' do
|
128
128
|
subject(:parser) { Bioinform::MatrixParser.new(has_name: true, fix_nucleotides_number: 4) }
|
129
129
|
let(:input) {">PM name\n" + "1\t2\t3\n" + "11\t12\t13" }
|
130
|
-
specify { expect{ parser.parse!(input) }.to raise_error
|
130
|
+
specify { expect{ parser.parse!(input) }.to raise_error(Bioinform::Error) }
|
131
131
|
end
|
132
132
|
|
133
133
|
context 'parser with auto transposition' do
|
data/spec/spec_helper_source.rb
CHANGED
@@ -37,7 +37,7 @@ def parser_specs(parser, good_cases, bad_cases)
|
|
37
37
|
|
38
38
|
bad_cases.each do |case_description, input|
|
39
39
|
it "should raise an exception on parsing #{case_description}" do
|
40
|
-
expect{ parser.parse!(input[:input]) }.to raise_error
|
40
|
+
expect{ parser.parse!(input[:input]) }.to raise_error(Bioinform::Error)
|
41
41
|
end
|
42
42
|
end
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Vorontsov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A bunch of useful classes for bioinformatics
|
14
14
|
email:
|
@@ -22,6 +22,7 @@ extra_rdoc_files: []
|
|
22
22
|
files:
|
23
23
|
- ".gitignore"
|
24
24
|
- Gemfile
|
25
|
+
- Gemfile.lock
|
25
26
|
- Guardfile
|
26
27
|
- LICENSE
|
27
28
|
- README.md
|
@@ -62,6 +63,7 @@ files:
|
|
62
63
|
- lib/bioinform/parsers/motif_splitter.rb
|
63
64
|
- lib/bioinform/support.rb
|
64
65
|
- lib/bioinform/support/strip_doc.rb
|
66
|
+
- lib/bioinform/validator.rb
|
65
67
|
- lib/bioinform/version.rb
|
66
68
|
- spec/alphabet_spec.rb
|
67
69
|
- spec/background_spec.rb
|
@@ -120,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
120
122
|
version: '0'
|
121
123
|
requirements: []
|
122
124
|
rubyforge_project:
|
123
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.5.1
|
124
126
|
signing_key:
|
125
127
|
specification_version: 4
|
126
128
|
summary: Classes for work with different input formats of positional matrices and
|