bioinform 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +12 -0
- data/bioinform.gemspec +21 -0
- data/lib/bioinform.rb +7 -0
- data/lib/bioinform/data_models.rb +6 -0
- data/lib/bioinform/data_models/iupac.rb +22 -0
- data/lib/bioinform/data_models/iupac_word.rb +27 -0
- data/lib/bioinform/data_models/positional_count_matrix.rb +26 -0
- data/lib/bioinform/data_models/positional_matrix.rb +80 -0
- data/lib/bioinform/data_models/positional_probability_matrix.rb +17 -0
- data/lib/bioinform/data_models/positional_weight_matrix.rb +3 -0
- data/lib/bioinform/support.rb +7 -0
- data/lib/bioinform/support/callable_symbol.rb +11 -0
- data/lib/bioinform/support/curry_except_self.rb +5 -0
- data/lib/bioinform/support/multiline_squish.rb +6 -0
- data/lib/bioinform/support/pmap.rb +10 -0
- data/lib/bioinform/support/ptap.rb +7 -0
- data/lib/bioinform/support/same.rb +12 -0
- data/lib/bioinform/support/yaml_dump_file.rb +5 -0
- data/lib/bioinform/version.rb +3 -0
- data/spec/callable_symbol_spec.rb +37 -0
- data/spec/pmap_test.rb +24 -0
- data/spec/positional_matrix_spec.rb +169 -0
- data/spec/ptap_spec.rb +17 -0
- data/spec/same_spec.rb +19 -0
- metadata +113 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Ilya Vorontsov
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Bioinform
|
2
|
+
|
3
|
+
Bioinform is a bunch of classes extracted from daily bioinformatics work. This classes is an attempt to encapsulate loading(parsing) logic for positional matrices in different formats and common transformations. It also includes several core classes extensions which are particularly useful on Enumerables
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'bioinform'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install bioinform
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Usage is under construction. I don't recommend use this gem for a while: syntax is on the way to change to more simple and concise. But stay tuned
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bioinform.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/bioinform/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Ilya Vorontsov"]
|
6
|
+
gem.email = ["prijutme4ty@gmail.com"]
|
7
|
+
gem.description = %q{A bunch of useful classes for bioinformatics}
|
8
|
+
gem.summary = %q{Classes for work with different input formats of positional matrices and IUPAC-words and making simple transform and statistics with them. Also module includes several useful extensions for Enumerable module like parametric map and callable symbols }
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "bioinform"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Bioinform::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency('active_support', '~> 3.0.0')
|
19
|
+
|
20
|
+
gem.add_development_dependency "rspec", "~> 2.0"
|
21
|
+
end
|
data/lib/bioinform.rb
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
require 'bioinform/data_models/positional_matrix'
|
2
|
+
require 'bioinform/data_models/positional_count_matrix'
|
3
|
+
require 'bioinform/data_models/positional_weight_matrix'
|
4
|
+
require 'bioinform/data_models/positional_probability_matrix'
|
5
|
+
#require 'bioinform/data_models/iupac_word'
|
6
|
+
#require 'bioinform/data_models/iupac' #require 'bioinform/data_models/iupac_vocabulary'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Iupac
|
2
|
+
attr_reader :words
|
3
|
+
def initialize(input)
|
4
|
+
case input
|
5
|
+
when Array
|
6
|
+
@words = input.map{|word| IupacWord.new word}
|
7
|
+
when String
|
8
|
+
@words = input.gsub("\r\n","\n").split("\n").map{|word| IupacWord.new(word)}
|
9
|
+
when IupacWord
|
10
|
+
@words = [input]
|
11
|
+
else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
|
12
|
+
end
|
13
|
+
raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same?(&:length)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_pcm
|
17
|
+
@words.map(&:to_pcm).inject(:+)
|
18
|
+
end
|
19
|
+
def to_pwm
|
20
|
+
to_pcm.to_pwm
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class IupacWord
|
2
|
+
IupacLetters = %w{A C G T R Y K M S W B D H V N}
|
3
|
+
Code = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
|
4
|
+
"AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
|
5
|
+
"CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
|
6
|
+
Decode = Code.invert
|
7
|
+
LetterCode = Hash[Decode.map{|k,v|
|
8
|
+
[k, %w{A C G T}.map{|chr| (v.include?(chr) ? 1.0 : 0.0) / v.size} ]
|
9
|
+
}]
|
10
|
+
|
11
|
+
attr_reader :word
|
12
|
+
attr_accessor :weight
|
13
|
+
def initialize(word)
|
14
|
+
raise "Non-IUPAC letter in a word #{word}" unless word.each_char.all?{|letter| IupacLetters.include? letter}
|
15
|
+
@word = word
|
16
|
+
@weight = 1
|
17
|
+
end
|
18
|
+
|
19
|
+
def length
|
20
|
+
word.length
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_pcm
|
24
|
+
matrix = @word.each_char.map{|letter| LetterCode[letter]}
|
25
|
+
PositionalCountMatrix.new(matrix)
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class PositionalCountMatrix < PositionalMatrix
|
2
|
+
attr_accessor :pseudocount, :background
|
3
|
+
def initialize(*args)
|
4
|
+
super
|
5
|
+
raise ArgumentError, 'PCM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
|
6
|
+
raise ArgumentError, 'PCM summary count is zero or negative' unless count>=0
|
7
|
+
# summary counts can slightly differ from each other due to floating point precision
|
8
|
+
unless @matrix.all?{|position| (position.inject(:+) - count).abs < 0.01*count }
|
9
|
+
raise ArgumentError, 'PCM has different summary count at each position'
|
10
|
+
end
|
11
|
+
@background = [1.0, 1.0, 1.0, 1.0]
|
12
|
+
@pseudocount = 1.0
|
13
|
+
end
|
14
|
+
def count
|
15
|
+
@count ||= @matrix.first.inject(&:+)
|
16
|
+
end
|
17
|
+
def to_pwm
|
18
|
+
normalize_coef = background.inject(&:+)
|
19
|
+
bckgr = @background.map{|el| el*1.0/normalize_coef}
|
20
|
+
PositionalWeightMatrix.new @matrix.map{|pos| pos.map.with_index{|el,ind| Math.log(el+bckgr[ind]*@pseudocount /(bckgr[ind]*(count + @pseudocount))) }}
|
21
|
+
end
|
22
|
+
def +(another_pcm)
|
23
|
+
raise ArgumentError, 'another PCM should be of the same length' unless another_pcm.length == length
|
24
|
+
PositionalCountMatrix.new matrix.map.with_index {|pos, i| pos.map.with_index{|el,j| el+another_pcm.matrix[i][j] }}
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
2
|
+
require 'bioinform/support/multiline_squish'
|
3
|
+
require 'bioinform/support/same'
|
4
|
+
require 'bioinform/support/pmap'
|
5
|
+
|
6
|
+
class PositionalMatrix
|
7
|
+
module DefaultParser
|
8
|
+
number_pat = '[+-]?\d+(\.\d+)?'
|
9
|
+
row_pat = "(#{number_pat} )*#{number_pat}"
|
10
|
+
name_pat = '>? ?(?<name>[\w.-]+)\n'
|
11
|
+
matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
12
|
+
Pattern = /\A(#{name_pat})?#{matrix_pat}\z/
|
13
|
+
def self.parse(input)
|
14
|
+
input.multiline_squish.match(Pattern)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module FantomParser
|
19
|
+
number_pat = '[+-]?\d+(\.\d+)?'
|
20
|
+
row_pat = "(#{number_pat} )*#{number_pat}"
|
21
|
+
matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
|
22
|
+
Pattern = /\ANA (?<name>.+)\nA C G T\n#{matrix_pat}\z/
|
23
|
+
def self.trim_first_position(input)
|
24
|
+
inp = input.split("\n")
|
25
|
+
([inp[0]] + inp[1..-1].map{|x| x.split(' ')[1..4].join(' ') }).join("\n")
|
26
|
+
end
|
27
|
+
def self.parse(input)
|
28
|
+
trim_first_position(input.multiline_squish).match(Pattern)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
attr_reader :name, :matrix
|
35
|
+
def initialize(input,parser = DefaultParser)
|
36
|
+
case input
|
37
|
+
when String
|
38
|
+
match = parser.parse(input)
|
39
|
+
raise ArgumentError, 'Can\'t create positional matrix basing on such input' unless match
|
40
|
+
@name = match[:name]
|
41
|
+
@matrix = match[:matrix].split("\n").map{|row| row.split.map(&:to_f)}
|
42
|
+
when Hash
|
43
|
+
inp = input.with_indifferent_access
|
44
|
+
@matrix = [inp[:A],inp[:C], inp[:G], inp[:T]]
|
45
|
+
when Array
|
46
|
+
@matrix = input.map do |pos|
|
47
|
+
case pos
|
48
|
+
when Array then pos
|
49
|
+
when Hash then [pos[:A], pos[:C], pos[:G], pos[:T]]
|
50
|
+
else raise ArgumentError, 'Unknown type of argument inner dimension'
|
51
|
+
end
|
52
|
+
end
|
53
|
+
else
|
54
|
+
raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
|
55
|
+
end
|
56
|
+
raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same?(&:size)
|
57
|
+
raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
|
58
|
+
@matrix = @matrix.transpose if @matrix.first.size != 4
|
59
|
+
end
|
60
|
+
|
61
|
+
def size
|
62
|
+
@matrix.size
|
63
|
+
end
|
64
|
+
alias_method :length, :size
|
65
|
+
|
66
|
+
def to_s(with_name = true)
|
67
|
+
mat_str = @matrix.pmap("\t",&:join).join("\n")
|
68
|
+
(with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
|
69
|
+
end
|
70
|
+
|
71
|
+
def pretty_string(with_name = true)
|
72
|
+
header = " A C G T \n"
|
73
|
+
mat_str = @matrix.map{|position| position.map{|el| el.round(3).to_s.rjust(6)}.join(' ') }.join("\n")
|
74
|
+
(with_name && @name) ? @name + "\n" + header + mat_str : header + mat_str
|
75
|
+
end
|
76
|
+
|
77
|
+
def to_hash
|
78
|
+
{A: @matrix.map{|pos| pos[0]}, C: @matrix.map{|pos| pos[1]}, G: @matrix.map{|pos| pos[2]}, T: @matrix.map{|pos| pos[3]}}.with_indifferent_access
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class PositionalProbabilityMatrix < PositionalMatrix
|
2
|
+
attr_accessor :count
|
3
|
+
def initialize(input_string)
|
4
|
+
super(input_string)
|
5
|
+
raise ArgumentError, 'PPM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
|
6
|
+
# summary counts can slightly differ from each other due to floating point precision
|
7
|
+
unless @matrix.all?{|position| (position.inject(:+) - 1.0).abs < 0.01 }
|
8
|
+
raise ArgumentError, 'PPM has summary probability at some position that differs from 1.0'
|
9
|
+
end
|
10
|
+
end
|
11
|
+
def to_pcm
|
12
|
+
PositionalCountMatrix.new @matrix.map{|pos| pos.map{|el| el*@count}}
|
13
|
+
end
|
14
|
+
def to_pwm
|
15
|
+
to_pcm.to_pwm
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require 'bioinform/support/curry_except_self'
|
2
|
+
require 'bioinform/support/callable_symbol'
|
3
|
+
require 'bioinform/support/pmap'
|
4
|
+
require 'bioinform/support/ptap'
|
5
|
+
require 'bioinform/support/same'
|
6
|
+
require 'bioinform/support/yaml_dump_file'
|
7
|
+
require 'bioinform/support/multiline_squish'
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'bioinform/support/curry_except_self'
|
2
|
+
|
3
|
+
class Symbol
|
4
|
+
def call(*args)
|
5
|
+
obj=Object.new.instance_exec(self,args){|sym,params| @sym=sym; @args = params; self}
|
6
|
+
obj.define_singleton_method :to_proc do
|
7
|
+
@sym.to_proc.curry_except_self(*@args)
|
8
|
+
end
|
9
|
+
obj
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'bioinform/support/callable_symbol'
|
3
|
+
|
4
|
+
class TestEnumerablePmap < Test::Unit::TestCase
|
5
|
+
def test_with_tap
|
6
|
+
assert_equal ['abc','def','ghi'], ['abc','','','def','ghi'].tap(&:delete.(''))
|
7
|
+
|
8
|
+
x = ['abc','','','def','ghi']
|
9
|
+
assert_equal false, ['abc','def','ghi'].equal?(x.tap(&:delete.('')))
|
10
|
+
|
11
|
+
x = ['abc','','','def','ghi']
|
12
|
+
assert_equal true, x.equal?(x.tap(&:delete.('')))
|
13
|
+
|
14
|
+
x = ['abc','','','def','ghi']
|
15
|
+
assert_equal ['abc','','','def','ghi'], ['abc','','','def','ghi'].tap(&:to_s)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_pmap_bang_without_parameters
|
19
|
+
x = [1,2,3]
|
20
|
+
assert_equal x.map!(&:to_s), ['1', '2', '3']
|
21
|
+
assert_equal x, ['1', '2', '3']
|
22
|
+
end
|
23
|
+
def test_with_map_bang_with_parameters
|
24
|
+
y = [1,2,3]
|
25
|
+
assert_equal y.map!(&:to_s.(2)), ['1', '10', '11']
|
26
|
+
assert_equal y, ['1', '10', '11']
|
27
|
+
end
|
28
|
+
def test_with_map_without_bang
|
29
|
+
x = [1,2,3]
|
30
|
+
assert_equal x.map(&:to_s.(2)), ['1', '10', '11']
|
31
|
+
assert_equal x, [1, 2, 3]
|
32
|
+
end
|
33
|
+
def test_one_more_with_map
|
34
|
+
assert_equal [[1,2,3],[4,5,6]].map(&:join.(' ')).join("\n"), "1 2 3\n4 5 6"
|
35
|
+
assert_equal [1,2,3,4,5].map(&:to_s.(2)), ['1', '10', '11', '100', '101']
|
36
|
+
end
|
37
|
+
end
|
data/spec/pmap_test.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'bioinform/support/pmap'
|
3
|
+
|
4
|
+
class TestEnumerablePmap < Test::Unit::TestCase
|
5
|
+
def test_pmap_bang_without_parameters
|
6
|
+
x = [1,2,3]
|
7
|
+
assert_equal x.pmap!(&:to_s), ['1', '2', '3']
|
8
|
+
assert_equal x, ['1', '2', '3']
|
9
|
+
end
|
10
|
+
def test_pmap_bang_with_parameters
|
11
|
+
y = [1,2,3]
|
12
|
+
assert_equal y.pmap!(2, &:to_s), ['1', '10', '11']
|
13
|
+
assert_equal y, ['1', '10', '11']
|
14
|
+
end
|
15
|
+
def test_pmap_without_bang
|
16
|
+
x = [1,2,3]
|
17
|
+
assert_equal x.pmap(2, &:to_s), ['1', '10', '11']
|
18
|
+
assert_equal x, [1, 2, 3]
|
19
|
+
end
|
20
|
+
def test_one_more_pmap
|
21
|
+
assert_equal [[1,2,3],[4,5,6]].pmap(' ',&:join).join("\n"), "1 2 3\n4 5 6"
|
22
|
+
assert_equal [1,2,3,4,5].pmap(2,&:to_s), ['1', '10', '11', '100', '101']
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'bioinform/data_models/positional_matrix'
|
3
|
+
|
4
|
+
class PositionalMatrixTest < Test::Unit::TestCase
|
5
|
+
def test_input_has_name
|
6
|
+
m = PositionalMatrix.new <<-EOF
|
7
|
+
> Testmatrix_1
|
8
|
+
1.23 4.56 1.2 1.0
|
9
|
+
1.45 1.23 1.48 1.9
|
10
|
+
-5.6 7 4.56 10.1
|
11
|
+
4.13 -15.6 8.7 0.0
|
12
|
+
2.2 3.3 4.4 5.5
|
13
|
+
EOF
|
14
|
+
assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
|
15
|
+
assert_equal m.name, 'Testmatrix_1'
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_input_has_tabs_and_multiple_spaces_and_carriage_returns_at_eol
|
19
|
+
m = PositionalMatrix.new <<-EOF
|
20
|
+
1.23\t4.56 1.2 1.0\r
|
21
|
+
1.45 1.23 1.48 1.9\r
|
22
|
+
-5.6 7.8 4.56 10.1
|
23
|
+
4.13 -15.6\t\t8.7 0.0
|
24
|
+
2.2 3.3 4.4 5.5
|
25
|
+
EOF
|
26
|
+
assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.8, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_input_has_finishing_and_leading_newlines
|
30
|
+
m = PositionalMatrix.new <<-EOF
|
31
|
+
|
32
|
+
> Testmatrix_1
|
33
|
+
1.23 4.56 1.2 1.0
|
34
|
+
1.45 1.23 1.48 1.9
|
35
|
+
-5.6 7 4.56 10.1
|
36
|
+
4.13 -15.6 8.7 0.0
|
37
|
+
2.2 3.3 4.4 5.5
|
38
|
+
|
39
|
+
EOF
|
40
|
+
assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.0, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_input_has_no_name
|
44
|
+
m = PositionalMatrix.new <<-EOF
|
45
|
+
1.23 4.56 1.2 1.0
|
46
|
+
1.45 1.23 1.48 1.9
|
47
|
+
-5.6 7 4.56 10.1
|
48
|
+
4.13 -15.6 8.7 0.0
|
49
|
+
2.2 3.3 4.4 5.5
|
50
|
+
EOF
|
51
|
+
assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.0, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
|
52
|
+
assert_equal m.name, nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_input_positions_as_rows
|
56
|
+
m = PositionalMatrix.new <<-EOF
|
57
|
+
Testmatrix-2
|
58
|
+
1.23 4.56 1.2 1.0 78 12.3
|
59
|
+
1.45 1.23 1.48 1.9 10.1 12.0
|
60
|
+
-5.6 7 4.56 10.1 4 12
|
61
|
+
4.13 -15.6 8.7 0.0 1.1 5
|
62
|
+
EOF
|
63
|
+
assert_equal m.matrix, [[1.23, 1.45, -5.6, 4.13], [4.56, 1.23, 7.0, -15.6], [1.2, 1.48, 4.56, 8.7], [1.0, 1.9, 10.1, 0.0],[78, 10.1, 4.0, 1.1], [12.3, 12.0, 12.0, 5.0]]
|
64
|
+
assert_equal m.name, 'Testmatrix-2'
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_fails_on_nonnumeric_data
|
68
|
+
assert_raise ArgumentError do
|
69
|
+
m = PositionalMatrix.new <<-EOF
|
70
|
+
1.23ss 4.56ww 1.2 1.0
|
71
|
+
1.45zz 1.23 1.48 1.9
|
72
|
+
-5.6 7 4.56 10.1
|
73
|
+
4.13 -15.6 8.7 0.0
|
74
|
+
2.2 3.3 4.4 5.5
|
75
|
+
EOF
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_fails_on_different_row_size
|
80
|
+
assert_raise ArgumentError do
|
81
|
+
m = PositionalMatrix.new <<-EOF
|
82
|
+
> Testmatrix_1
|
83
|
+
1.23 4.56 1.2 1.0
|
84
|
+
1.45 1.23 1.48
|
85
|
+
-5.6 7 4.56 10.1
|
86
|
+
4.13 -15.6 8.7 0.0
|
87
|
+
2.2 3.3 4.4 5.5
|
88
|
+
EOF
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_fails_if_either_row_nor_col_has_size_4
|
93
|
+
assert_raise ArgumentError do
|
94
|
+
m = PositionalMatrix.new <<-EOF
|
95
|
+
1 2 3 4 5
|
96
|
+
2 2 -2 2 2
|
97
|
+
3 3 3 3 3
|
98
|
+
4 -4 4 -4 -4
|
99
|
+
5 5 -5 -5 5
|
100
|
+
EOF
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_to_s
|
105
|
+
m = PositionalMatrix.new <<-EOF
|
106
|
+
> Testmatrix_1
|
107
|
+
1.23 4.56 1.2 1.0
|
108
|
+
1.45 1.23 1.48 1.9
|
109
|
+
-5.6 7 4.56 10.1
|
110
|
+
4.13 -15.6 8.7 0.0
|
111
|
+
2.2 3.3 4.4 5.5
|
112
|
+
EOF
|
113
|
+
assert_equal m.to_s, "Testmatrix_1\n1.23\t4.56\t1.2\t1.0\n1.45\t1.23\t1.48\t1.9\n-5.6\t7.0\t4.56\t10.1\n4.13\t-15.6\t8.7\t0.0\n2.2\t3.3\t4.4\t5.5"
|
114
|
+
assert_equal m.to_s(false), "1.23\t4.56\t1.2\t1.0\n1.45\t1.23\t1.48\t1.9\n-5.6\t7.0\t4.56\t10.1\n4.13\t-15.6\t8.7\t0.0\n2.2\t3.3\t4.4\t5.5"
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_pretty_string
|
118
|
+
m = PositionalMatrix.new <<-EOF
|
119
|
+
> Testmatrix_1
|
120
|
+
1.23 4.56 1.2 1.0
|
121
|
+
1.45 1.23 1.48 1.9
|
122
|
+
-5.6 7 4.56 10.1
|
123
|
+
4.13 -15.6 8.7 0.0
|
124
|
+
2.2 3.3 4.4 5.5
|
125
|
+
EOF
|
126
|
+
assert_equal m.pretty_string, "Testmatrix_1\n A C G T \n 1.23 4.56 1.2 1.0\n 1.45 1.23 1.48 1.9\n -5.6 7.0 4.56 10.1\n 4.13 -15.6 8.7 0.0\n 2.2 3.3 4.4 5.5"
|
127
|
+
assert_equal m.pretty_string(false), " A C G T \n 1.23 4.56 1.2 1.0\n 1.45 1.23 1.48 1.9\n -5.6 7.0 4.56 10.1\n 4.13 -15.6 8.7 0.0\n 2.2 3.3 4.4 5.5"
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_to_hash
|
131
|
+
m = PositionalMatrix.new <<-EOF
|
132
|
+
> Testmatrix_1
|
133
|
+
1.23 4.56 1.2 1.0
|
134
|
+
1.45 1.23 1.48 1.9
|
135
|
+
-5.6 7 4.56 10.1
|
136
|
+
4.13 -15.6 8.7 0.0
|
137
|
+
2.2 3.3 4.4 5.5
|
138
|
+
EOF
|
139
|
+
assert_equal m.to_hash, {A: [1.23, 1.45, -5.6, 4.13, 2.2], C:[4.56, 1.23, 7.0, -15.6, 3.3], G:[1.2, 1.48, 4.56, 8.7, 4.4], T:[1.0, 1.9, 10.1, 0.0, 5.5]}.with_indifferent_access
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_hash_input
|
143
|
+
m = PositionalMatrix.new(A: [1.23, 1.45, -5.6, 4.13, 2.2], C:[4.56, 1.23, 7, -15.6, 3.3],'G' => [1.2, 1.48, 4.56, 8.7, 4.4], 'T'=>[1.0, 1.9, 10.1, 0.0, 5.5])
|
144
|
+
assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.0, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_size
|
148
|
+
m = PositionalMatrix.new(A: [1.23, 1.45, -5.6, 4.13, 2.2], C:[4.56, 1.23, 7, -15.6, 3.3],'G' => [1.2, 1.48, 4.56, 8.7, 4.4], 'T'=>[1.0, 1.9, 10.1, 0.0, 5.5])
|
149
|
+
assert_equal m.size, 5
|
150
|
+
assert_equal m.length, m.size
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_fantom_parser
|
154
|
+
input = <<-EOS
|
155
|
+
NA motif_CTNCAG
|
156
|
+
P0 A C G T
|
157
|
+
P1 0 1878368 0 0
|
158
|
+
P2 0 0 0 1878368
|
159
|
+
P3 469592 469592 469592 469592
|
160
|
+
P4 0 1878368 0 0
|
161
|
+
P5 1878368 0 0 0
|
162
|
+
P6 0 0 1878368 0
|
163
|
+
EOS
|
164
|
+
m = PositionalMatrix.new input, PositionalMatrix::FantomParser
|
165
|
+
assert_equal 'motif_CTNCAG', m.name
|
166
|
+
assert_equal [[0,1878368,0,0],[0,0,0,1878368],[469592,469592,469592,469592],[0,1878368,0,0],[1878368,0,0,0],[0,0,1878368,0]], m.matrix
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
data/spec/ptap_spec.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'bioinform/support/ptap'
|
3
|
+
|
4
|
+
class TestEnumerablePmap < Test::Unit::TestCase
|
5
|
+
def test_ptap
|
6
|
+
assert_equal ['abc','def','ghi'], ['abc','','','def','ghi'].ptap('',&:delete)
|
7
|
+
|
8
|
+
x = ['abc','','','def','ghi']
|
9
|
+
assert_equal false, ['abc','def','ghi'].equal?(x.ptap('',&:delete))
|
10
|
+
|
11
|
+
x = ['abc','','','def','ghi']
|
12
|
+
assert_equal true, x.equal?(x.ptap('',&:delete))
|
13
|
+
|
14
|
+
x = ['abc','','','def','ghi']
|
15
|
+
assert_equal ['abc','','','def','ghi'], ['abc','','','def','ghi'].ptap(&:to_s)
|
16
|
+
end
|
17
|
+
end
|
data/spec/same_spec.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'bioinform/support/same'
|
3
|
+
|
4
|
+
class TestEnumerableSame < Test::Unit::TestCase
|
5
|
+
def test_same
|
6
|
+
assert_equal(true, [1,3,9,7].same?(&:even?))
|
7
|
+
assert_equal(true, [4,8,2,2].same?(&:even?))
|
8
|
+
assert_equal(false, [1,8,3,2].same?(&:even?))
|
9
|
+
|
10
|
+
assert_equal(true, %w{cat dog rat}.same?(&:length))
|
11
|
+
assert_equal(false, %w{cat dog rabbit}.same?(&:length))
|
12
|
+
|
13
|
+
assert_equal(true, %w{cat cat cat}.same?)
|
14
|
+
assert_equal(false, %w{cat dog rat}.same?)
|
15
|
+
|
16
|
+
assert_equal(true, [].same?(&:length))
|
17
|
+
assert_equal(true, [].same?)
|
18
|
+
end
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bioinform
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ilya Vorontsov
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-30 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: active_support
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 3.0.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 3.0.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '2.0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '2.0'
|
46
|
+
description: A bunch of useful classes for bioinformatics
|
47
|
+
email:
|
48
|
+
- prijutme4ty@gmail.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- .gitignore
|
54
|
+
- Gemfile
|
55
|
+
- LICENSE
|
56
|
+
- README.md
|
57
|
+
- Rakefile
|
58
|
+
- bioinform.gemspec
|
59
|
+
- lib/bioinform.rb
|
60
|
+
- lib/bioinform/data_models.rb
|
61
|
+
- lib/bioinform/data_models/iupac.rb
|
62
|
+
- lib/bioinform/data_models/iupac_word.rb
|
63
|
+
- lib/bioinform/data_models/positional_count_matrix.rb
|
64
|
+
- lib/bioinform/data_models/positional_matrix.rb
|
65
|
+
- lib/bioinform/data_models/positional_probability_matrix.rb
|
66
|
+
- lib/bioinform/data_models/positional_weight_matrix.rb
|
67
|
+
- lib/bioinform/support.rb
|
68
|
+
- lib/bioinform/support/callable_symbol.rb
|
69
|
+
- lib/bioinform/support/curry_except_self.rb
|
70
|
+
- lib/bioinform/support/multiline_squish.rb
|
71
|
+
- lib/bioinform/support/pmap.rb
|
72
|
+
- lib/bioinform/support/ptap.rb
|
73
|
+
- lib/bioinform/support/same.rb
|
74
|
+
- lib/bioinform/support/yaml_dump_file.rb
|
75
|
+
- lib/bioinform/version.rb
|
76
|
+
- spec/callable_symbol_spec.rb
|
77
|
+
- spec/pmap_test.rb
|
78
|
+
- spec/positional_matrix_spec.rb
|
79
|
+
- spec/ptap_spec.rb
|
80
|
+
- spec/same_spec.rb
|
81
|
+
homepage: ''
|
82
|
+
licenses: []
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options: []
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.8.24
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: Classes for work with different input formats of positional matrices and
|
105
|
+
IUPAC-words and making simple transform and statistics with them. Also module includes
|
106
|
+
several useful extensions for Enumerable module like parametric map and callable
|
107
|
+
symbols
|
108
|
+
test_files:
|
109
|
+
- spec/callable_symbol_spec.rb
|
110
|
+
- spec/pmap_test.rb
|
111
|
+
- spec/positional_matrix_spec.rb
|
112
|
+
- spec/ptap_spec.rb
|
113
|
+
- spec/same_spec.rb
|