bioinform 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in bioinform.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Ilya Vorontsov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Bioinform
2
+
3
+ Bioinform is a bunch of classes extracted from daily bioinformatics work. This classes is an attempt to encapsulate loading(parsing) logic for positional matrices in different formats and common transformations. It also includes several core classes extensions which are particularly useful on Enumerables
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'bioinform'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install bioinform
18
+
19
+ ## Usage
20
+
21
+ Usage is under construction. I don't recommend use this gem for a while: syntax is on the way to change to more simple and concise. But stay tuned
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ desc 'Spec bioinform library'
6
+ task :spec do
7
+ Dir.glob('spec/*_spec.rb') do |spec_file|
8
+ system("ruby #{spec_file}")
9
+ end
10
+ end
11
+
12
+ # RSpec::Core::RakeTask.new
data/bioinform.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/bioinform/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Ilya Vorontsov"]
6
+ gem.email = ["prijutme4ty@gmail.com"]
7
+ gem.description = %q{A bunch of useful classes for bioinformatics}
8
+ gem.summary = %q{Classes for work with different input formats of positional matrices and IUPAC-words and making simple transform and statistics with them. Also module includes several useful extensions for Enumerable module like parametric map and callable symbols }
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "bioinform"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Bioinform::VERSION
17
+
18
+ gem.add_dependency('active_support', '~> 3.0.0')
19
+
20
+ gem.add_development_dependency "rspec", "~> 2.0"
21
+ end
data/lib/bioinform.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'bioinform/version'
2
+ require 'bioinform/support'
3
+ require 'bioinform/data_models'
4
+
5
+ module Bioinform
6
+ # Your code goes here...
7
+ end
@@ -0,0 +1,6 @@
1
+ require 'bioinform/data_models/positional_matrix'
2
+ require 'bioinform/data_models/positional_count_matrix'
3
+ require 'bioinform/data_models/positional_weight_matrix'
4
+ require 'bioinform/data_models/positional_probability_matrix'
5
+ #require 'bioinform/data_models/iupac_word'
6
+ #require 'bioinform/data_models/iupac' #require 'bioinform/data_models/iupac_vocabulary'
@@ -0,0 +1,22 @@
1
+ class Iupac
2
+ attr_reader :words
3
+ def initialize(input)
4
+ case input
5
+ when Array
6
+ @words = input.map{|word| IupacWord.new word}
7
+ when String
8
+ @words = input.gsub("\r\n","\n").split("\n").map{|word| IupacWord.new(word)}
9
+ when IupacWord
10
+ @words = [input]
11
+ else raise ArgumentError, 'Can\'t create IUPAC Word List: unknown input type'
12
+ end
13
+ raise ArgumentError, 'IUPAC words should be of the same length' unless @words.same?(&:length)
14
+ end
15
+
16
+ def to_pcm
17
+ @words.map(&:to_pcm).inject(:+)
18
+ end
19
+ def to_pwm
20
+ to_pcm.to_pwm
21
+ end
22
+ end
@@ -0,0 +1,27 @@
1
+ class IupacWord
2
+ IupacLetters = %w{A C G T R Y K M S W B D H V N}
3
+ Code = {"A" => "A", "C" => "C", "G" => "G", "T" => "T",
4
+ "AG" => "R", "CT" => "Y", "GT" => "K", "AC" => "M",
5
+ "CG" => "S", "AT" => "W", "CGT" => "B", "AGT" => "D", "ACT" => "H", "ACG" => "V", "ACGT" => "N"}
6
+ Decode = Code.invert
7
+ LetterCode = Hash[Decode.map{|k,v|
8
+ [k, %w{A C G T}.map{|chr| (v.include?(chr) ? 1.0 : 0.0) / v.size} ]
9
+ }]
10
+
11
+ attr_reader :word
12
+ attr_accessor :weight
13
+ def initialize(word)
14
+ raise "Non-IUPAC letter in a word #{word}" unless word.each_char.all?{|letter| IupacLetters.include? letter}
15
+ @word = word
16
+ @weight = 1
17
+ end
18
+
19
+ def length
20
+ word.length
21
+ end
22
+
23
+ def to_pcm
24
+ matrix = @word.each_char.map{|letter| LetterCode[letter]}
25
+ PositionalCountMatrix.new(matrix)
26
+ end
27
+ end
@@ -0,0 +1,26 @@
1
+ class PositionalCountMatrix < PositionalMatrix
2
+ attr_accessor :pseudocount, :background
3
+ def initialize(*args)
4
+ super
5
+ raise ArgumentError, 'PCM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
6
+ raise ArgumentError, 'PCM summary count is zero or negative' unless count>=0
7
+ # summary counts can slightly differ from each other due to floating point precision
8
+ unless @matrix.all?{|position| (position.inject(:+) - count).abs < 0.01*count }
9
+ raise ArgumentError, 'PCM has different summary count at each position'
10
+ end
11
+ @background = [1.0, 1.0, 1.0, 1.0]
12
+ @pseudocount = 1.0
13
+ end
14
+ def count
15
+ @count ||= @matrix.first.inject(&:+)
16
+ end
17
+ def to_pwm
18
+ normalize_coef = background.inject(&:+)
19
+ bckgr = @background.map{|el| el*1.0/normalize_coef}
20
+ PositionalWeightMatrix.new @matrix.map{|pos| pos.map.with_index{|el,ind| Math.log(el+bckgr[ind]*@pseudocount /(bckgr[ind]*(count + @pseudocount))) }}
21
+ end
22
+ def +(another_pcm)
23
+ raise ArgumentError, 'another PCM should be of the same length' unless another_pcm.length == length
24
+ PositionalCountMatrix.new matrix.map.with_index {|pos, i| pos.map.with_index{|el,j| el+another_pcm.matrix[i][j] }}
25
+ end
26
+ end
@@ -0,0 +1,80 @@
1
+ require 'active_support/core_ext/hash/indifferent_access'
2
+ require 'bioinform/support/multiline_squish'
3
+ require 'bioinform/support/same'
4
+ require 'bioinform/support/pmap'
5
+
6
+ class PositionalMatrix
7
+ module DefaultParser
8
+ number_pat = '[+-]?\d+(\.\d+)?'
9
+ row_pat = "(#{number_pat} )*#{number_pat}"
10
+ name_pat = '>? ?(?<name>[\w.-]+)\n'
11
+ matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
12
+ Pattern = /\A(#{name_pat})?#{matrix_pat}\z/
13
+ def self.parse(input)
14
+ input.multiline_squish.match(Pattern)
15
+ end
16
+ end
17
+
18
+ module FantomParser
19
+ number_pat = '[+-]?\d+(\.\d+)?'
20
+ row_pat = "(#{number_pat} )*#{number_pat}"
21
+ matrix_pat = "(?<matrix>(#{row_pat}\n)*#{row_pat})"
22
+ Pattern = /\ANA (?<name>.+)\nA C G T\n#{matrix_pat}\z/
23
+ def self.trim_first_position(input)
24
+ inp = input.split("\n")
25
+ ([inp[0]] + inp[1..-1].map{|x| x.split(' ')[1..4].join(' ') }).join("\n")
26
+ end
27
+ def self.parse(input)
28
+ trim_first_position(input.multiline_squish).match(Pattern)
29
+ end
30
+ end
31
+
32
+
33
+
34
+ attr_reader :name, :matrix
35
+ def initialize(input,parser = DefaultParser)
36
+ case input
37
+ when String
38
+ match = parser.parse(input)
39
+ raise ArgumentError, 'Can\'t create positional matrix basing on such input' unless match
40
+ @name = match[:name]
41
+ @matrix = match[:matrix].split("\n").map{|row| row.split.map(&:to_f)}
42
+ when Hash
43
+ inp = input.with_indifferent_access
44
+ @matrix = [inp[:A],inp[:C], inp[:G], inp[:T]]
45
+ when Array
46
+ @matrix = input.map do |pos|
47
+ case pos
48
+ when Array then pos
49
+ when Hash then [pos[:A], pos[:C], pos[:G], pos[:T]]
50
+ else raise ArgumentError, 'Unknown type of argument inner dimension'
51
+ end
52
+ end
53
+ else
54
+ raise ArgumentError, 'Unknown format of input: only Strings, Arrays and hashes\'re available'
55
+ end
56
+ raise ArgumentError, 'Input has the different number of columns in each row' unless @matrix.same?(&:size)
57
+ raise ArgumentError unless @matrix.size == 4 || @matrix.first.size == 4
58
+ @matrix = @matrix.transpose if @matrix.first.size != 4
59
+ end
60
+
61
+ def size
62
+ @matrix.size
63
+ end
64
+ alias_method :length, :size
65
+
66
+ def to_s(with_name = true)
67
+ mat_str = @matrix.pmap("\t",&:join).join("\n")
68
+ (with_name && @name) ? "#{@name}\n#{mat_str}" : mat_str
69
+ end
70
+
71
+ def pretty_string(with_name = true)
72
+ header = " A C G T \n"
73
+ mat_str = @matrix.map{|position| position.map{|el| el.round(3).to_s.rjust(6)}.join(' ') }.join("\n")
74
+ (with_name && @name) ? @name + "\n" + header + mat_str : header + mat_str
75
+ end
76
+
77
+ def to_hash
78
+ {A: @matrix.map{|pos| pos[0]}, C: @matrix.map{|pos| pos[1]}, G: @matrix.map{|pos| pos[2]}, T: @matrix.map{|pos| pos[3]}}.with_indifferent_access
79
+ end
80
+ end
@@ -0,0 +1,17 @@
1
+ class PositionalProbabilityMatrix < PositionalMatrix
2
+ attr_accessor :count
3
+ def initialize(input_string)
4
+ super(input_string)
5
+ raise ArgumentError, 'PPM has negative matrix elements' unless @matrix.all?{|position| position.all?{|el| el>=0 }}
6
+ # summary counts can slightly differ from each other due to floating point precision
7
+ unless @matrix.all?{|position| (position.inject(:+) - 1.0).abs < 0.01 }
8
+ raise ArgumentError, 'PPM has summary probability at some position that differs from 1.0'
9
+ end
10
+ end
11
+ def to_pcm
12
+ PositionalCountMatrix.new @matrix.map{|pos| pos.map{|el| el*@count}}
13
+ end
14
+ def to_pwm
15
+ to_pcm.to_pwm
16
+ end
17
+ end
@@ -0,0 +1,3 @@
1
+ class PositionalWeightMatrix < PositionalMatrix
2
+
3
+ end
@@ -0,0 +1,7 @@
1
+ require 'bioinform/support/curry_except_self'
2
+ require 'bioinform/support/callable_symbol'
3
+ require 'bioinform/support/pmap'
4
+ require 'bioinform/support/ptap'
5
+ require 'bioinform/support/same'
6
+ require 'bioinform/support/yaml_dump_file'
7
+ require 'bioinform/support/multiline_squish'
@@ -0,0 +1,11 @@
1
+ require 'bioinform/support/curry_except_self'
2
+
3
+ class Symbol
4
+ def call(*args)
5
+ obj=Object.new.instance_exec(self,args){|sym,params| @sym=sym; @args = params; self}
6
+ obj.define_singleton_method :to_proc do
7
+ @sym.to_proc.curry_except_self(*@args)
8
+ end
9
+ obj
10
+ end
11
+ end
@@ -0,0 +1,5 @@
1
+ class Proc
2
+ def curry_except_self(*args)
3
+ Proc.new{|slf| curry[slf,*args] }
4
+ end
5
+ end
@@ -0,0 +1,6 @@
1
+ require 'active_support/core_ext/string/filters'
2
+ class String
3
+ def multiline_squish
4
+ split("\n").map(&:squish).drop_while(&:empty?).take_while{|line| !line.empty?}.join("\n")
5
+ end
6
+ end
@@ -0,0 +1,10 @@
1
+ require 'bioinform/support/curry_except_self'
2
+
3
+ module Enumerable
4
+ def pmap!(*args,&block)
5
+ map! &block.curry_except_self(*args)
6
+ end
7
+ def pmap(*args,&block)
8
+ dup.pmap!(*args, &block)
9
+ end
10
+ end
@@ -0,0 +1,7 @@
1
+ require 'bioinform/support/curry_except_self'
2
+ class Object
3
+ def ptap(*args,&block)
4
+ tap &block.curry_except_self(*args)
5
+ end
6
+ end
7
+
@@ -0,0 +1,12 @@
1
+ module Enumerable
2
+ def same?(&block)
3
+ return true if empty?
4
+ if block_given?
5
+ first_result = yield(first)
6
+ all?{|el| first_result == yield(el)}
7
+ else
8
+ first_result = first
9
+ all?{|el| first_result == el}
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,5 @@
1
+ require 'yaml'
2
+
3
+ def YAML.dump_file(obj,filename)
4
+ File.open(filename, 'w'){|f| YAML.dump(obj,f)}
5
+ end
@@ -0,0 +1,3 @@
1
+ module Bioinform
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,37 @@
1
+ require 'test/unit'
2
+ require 'bioinform/support/callable_symbol'
3
+
4
+ class TestEnumerablePmap < Test::Unit::TestCase
5
+ def test_with_tap
6
+ assert_equal ['abc','def','ghi'], ['abc','','','def','ghi'].tap(&:delete.(''))
7
+
8
+ x = ['abc','','','def','ghi']
9
+ assert_equal false, ['abc','def','ghi'].equal?(x.tap(&:delete.('')))
10
+
11
+ x = ['abc','','','def','ghi']
12
+ assert_equal true, x.equal?(x.tap(&:delete.('')))
13
+
14
+ x = ['abc','','','def','ghi']
15
+ assert_equal ['abc','','','def','ghi'], ['abc','','','def','ghi'].tap(&:to_s)
16
+ end
17
+
18
+ def test_pmap_bang_without_parameters
19
+ x = [1,2,3]
20
+ assert_equal x.map!(&:to_s), ['1', '2', '3']
21
+ assert_equal x, ['1', '2', '3']
22
+ end
23
+ def test_with_map_bang_with_parameters
24
+ y = [1,2,3]
25
+ assert_equal y.map!(&:to_s.(2)), ['1', '10', '11']
26
+ assert_equal y, ['1', '10', '11']
27
+ end
28
+ def test_with_map_without_bang
29
+ x = [1,2,3]
30
+ assert_equal x.map(&:to_s.(2)), ['1', '10', '11']
31
+ assert_equal x, [1, 2, 3]
32
+ end
33
+ def test_one_more_with_map
34
+ assert_equal [[1,2,3],[4,5,6]].map(&:join.(' ')).join("\n"), "1 2 3\n4 5 6"
35
+ assert_equal [1,2,3,4,5].map(&:to_s.(2)), ['1', '10', '11', '100', '101']
36
+ end
37
+ end
data/spec/pmap_test.rb ADDED
@@ -0,0 +1,24 @@
1
+ require 'test/unit'
2
+ require 'bioinform/support/pmap'
3
+
4
+ class TestEnumerablePmap < Test::Unit::TestCase
5
+ def test_pmap_bang_without_parameters
6
+ x = [1,2,3]
7
+ assert_equal x.pmap!(&:to_s), ['1', '2', '3']
8
+ assert_equal x, ['1', '2', '3']
9
+ end
10
+ def test_pmap_bang_with_parameters
11
+ y = [1,2,3]
12
+ assert_equal y.pmap!(2, &:to_s), ['1', '10', '11']
13
+ assert_equal y, ['1', '10', '11']
14
+ end
15
+ def test_pmap_without_bang
16
+ x = [1,2,3]
17
+ assert_equal x.pmap(2, &:to_s), ['1', '10', '11']
18
+ assert_equal x, [1, 2, 3]
19
+ end
20
+ def test_one_more_pmap
21
+ assert_equal [[1,2,3],[4,5,6]].pmap(' ',&:join).join("\n"), "1 2 3\n4 5 6"
22
+ assert_equal [1,2,3,4,5].pmap(2,&:to_s), ['1', '10', '11', '100', '101']
23
+ end
24
+ end
@@ -0,0 +1,169 @@
1
+ require 'test/unit'
2
+ require 'bioinform/data_models/positional_matrix'
3
+
4
+ class PositionalMatrixTest < Test::Unit::TestCase
5
+ def test_input_has_name
6
+ m = PositionalMatrix.new <<-EOF
7
+ > Testmatrix_1
8
+ 1.23 4.56 1.2 1.0
9
+ 1.45 1.23 1.48 1.9
10
+ -5.6 7 4.56 10.1
11
+ 4.13 -15.6 8.7 0.0
12
+ 2.2 3.3 4.4 5.5
13
+ EOF
14
+ assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
15
+ assert_equal m.name, 'Testmatrix_1'
16
+ end
17
+
18
+ def test_input_has_tabs_and_multiple_spaces_and_carriage_returns_at_eol
19
+ m = PositionalMatrix.new <<-EOF
20
+ 1.23\t4.56 1.2 1.0\r
21
+ 1.45 1.23 1.48 1.9\r
22
+ -5.6 7.8 4.56 10.1
23
+ 4.13 -15.6\t\t8.7 0.0
24
+ 2.2 3.3 4.4 5.5
25
+ EOF
26
+ assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.8, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
27
+ end
28
+
29
+ def test_input_has_finishing_and_leading_newlines
30
+ m = PositionalMatrix.new <<-EOF
31
+
32
+ > Testmatrix_1
33
+ 1.23 4.56 1.2 1.0
34
+ 1.45 1.23 1.48 1.9
35
+ -5.6 7 4.56 10.1
36
+ 4.13 -15.6 8.7 0.0
37
+ 2.2 3.3 4.4 5.5
38
+
39
+ EOF
40
+ assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.0, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
41
+ end
42
+
43
+ def test_input_has_no_name
44
+ m = PositionalMatrix.new <<-EOF
45
+ 1.23 4.56 1.2 1.0
46
+ 1.45 1.23 1.48 1.9
47
+ -5.6 7 4.56 10.1
48
+ 4.13 -15.6 8.7 0.0
49
+ 2.2 3.3 4.4 5.5
50
+ EOF
51
+ assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.0, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
52
+ assert_equal m.name, nil
53
+ end
54
+
55
+ def test_input_positions_as_rows
56
+ m = PositionalMatrix.new <<-EOF
57
+ Testmatrix-2
58
+ 1.23 4.56 1.2 1.0 78 12.3
59
+ 1.45 1.23 1.48 1.9 10.1 12.0
60
+ -5.6 7 4.56 10.1 4 12
61
+ 4.13 -15.6 8.7 0.0 1.1 5
62
+ EOF
63
+ assert_equal m.matrix, [[1.23, 1.45, -5.6, 4.13], [4.56, 1.23, 7.0, -15.6], [1.2, 1.48, 4.56, 8.7], [1.0, 1.9, 10.1, 0.0],[78, 10.1, 4.0, 1.1], [12.3, 12.0, 12.0, 5.0]]
64
+ assert_equal m.name, 'Testmatrix-2'
65
+ end
66
+
67
+ def test_fails_on_nonnumeric_data
68
+ assert_raise ArgumentError do
69
+ m = PositionalMatrix.new <<-EOF
70
+ 1.23ss 4.56ww 1.2 1.0
71
+ 1.45zz 1.23 1.48 1.9
72
+ -5.6 7 4.56 10.1
73
+ 4.13 -15.6 8.7 0.0
74
+ 2.2 3.3 4.4 5.5
75
+ EOF
76
+ end
77
+ end
78
+
79
+ def test_fails_on_different_row_size
80
+ assert_raise ArgumentError do
81
+ m = PositionalMatrix.new <<-EOF
82
+ > Testmatrix_1
83
+ 1.23 4.56 1.2 1.0
84
+ 1.45 1.23 1.48
85
+ -5.6 7 4.56 10.1
86
+ 4.13 -15.6 8.7 0.0
87
+ 2.2 3.3 4.4 5.5
88
+ EOF
89
+ end
90
+ end
91
+
92
+ def test_fails_if_either_row_nor_col_has_size_4
93
+ assert_raise ArgumentError do
94
+ m = PositionalMatrix.new <<-EOF
95
+ 1 2 3 4 5
96
+ 2 2 -2 2 2
97
+ 3 3 3 3 3
98
+ 4 -4 4 -4 -4
99
+ 5 5 -5 -5 5
100
+ EOF
101
+ end
102
+ end
103
+
104
+ def test_to_s
105
+ m = PositionalMatrix.new <<-EOF
106
+ > Testmatrix_1
107
+ 1.23 4.56 1.2 1.0
108
+ 1.45 1.23 1.48 1.9
109
+ -5.6 7 4.56 10.1
110
+ 4.13 -15.6 8.7 0.0
111
+ 2.2 3.3 4.4 5.5
112
+ EOF
113
+ assert_equal m.to_s, "Testmatrix_1\n1.23\t4.56\t1.2\t1.0\n1.45\t1.23\t1.48\t1.9\n-5.6\t7.0\t4.56\t10.1\n4.13\t-15.6\t8.7\t0.0\n2.2\t3.3\t4.4\t5.5"
114
+ assert_equal m.to_s(false), "1.23\t4.56\t1.2\t1.0\n1.45\t1.23\t1.48\t1.9\n-5.6\t7.0\t4.56\t10.1\n4.13\t-15.6\t8.7\t0.0\n2.2\t3.3\t4.4\t5.5"
115
+ end
116
+
117
+ def test_pretty_string
118
+ m = PositionalMatrix.new <<-EOF
119
+ > Testmatrix_1
120
+ 1.23 4.56 1.2 1.0
121
+ 1.45 1.23 1.48 1.9
122
+ -5.6 7 4.56 10.1
123
+ 4.13 -15.6 8.7 0.0
124
+ 2.2 3.3 4.4 5.5
125
+ EOF
126
+ assert_equal m.pretty_string, "Testmatrix_1\n A C G T \n 1.23 4.56 1.2 1.0\n 1.45 1.23 1.48 1.9\n -5.6 7.0 4.56 10.1\n 4.13 -15.6 8.7 0.0\n 2.2 3.3 4.4 5.5"
127
+ assert_equal m.pretty_string(false), " A C G T \n 1.23 4.56 1.2 1.0\n 1.45 1.23 1.48 1.9\n -5.6 7.0 4.56 10.1\n 4.13 -15.6 8.7 0.0\n 2.2 3.3 4.4 5.5"
128
+ end
129
+
130
+ def test_to_hash
131
+ m = PositionalMatrix.new <<-EOF
132
+ > Testmatrix_1
133
+ 1.23 4.56 1.2 1.0
134
+ 1.45 1.23 1.48 1.9
135
+ -5.6 7 4.56 10.1
136
+ 4.13 -15.6 8.7 0.0
137
+ 2.2 3.3 4.4 5.5
138
+ EOF
139
+ assert_equal m.to_hash, {A: [1.23, 1.45, -5.6, 4.13, 2.2], C:[4.56, 1.23, 7.0, -15.6, 3.3], G:[1.2, 1.48, 4.56, 8.7, 4.4], T:[1.0, 1.9, 10.1, 0.0, 5.5]}.with_indifferent_access
140
+ end
141
+
142
+ def test_hash_input
143
+ m = PositionalMatrix.new(A: [1.23, 1.45, -5.6, 4.13, 2.2], C:[4.56, 1.23, 7, -15.6, 3.3],'G' => [1.2, 1.48, 4.56, 8.7, 4.4], 'T'=>[1.0, 1.9, 10.1, 0.0, 5.5])
144
+ assert_equal m.matrix, [[1.23, 4.56, 1.2, 1.0], [1.45, 1.23, 1.48, 1.9], [-5.6, 7.0, 4.56, 10.1], [4.13, -15.6, 8.7, 0.0],[2.2, 3.3, 4.4, 5.5]]
145
+ end
146
+
147
+ def test_size
148
+ m = PositionalMatrix.new(A: [1.23, 1.45, -5.6, 4.13, 2.2], C:[4.56, 1.23, 7, -15.6, 3.3],'G' => [1.2, 1.48, 4.56, 8.7, 4.4], 'T'=>[1.0, 1.9, 10.1, 0.0, 5.5])
149
+ assert_equal m.size, 5
150
+ assert_equal m.length, m.size
151
+ end
152
+
153
+ def test_fantom_parser
154
+ input = <<-EOS
155
+ NA motif_CTNCAG
156
+ P0 A C G T
157
+ P1 0 1878368 0 0
158
+ P2 0 0 0 1878368
159
+ P3 469592 469592 469592 469592
160
+ P4 0 1878368 0 0
161
+ P5 1878368 0 0 0
162
+ P6 0 0 1878368 0
163
+ EOS
164
+ m = PositionalMatrix.new input, PositionalMatrix::FantomParser
165
+ assert_equal 'motif_CTNCAG', m.name
166
+ assert_equal [[0,1878368,0,0],[0,0,0,1878368],[469592,469592,469592,469592],[0,1878368,0,0],[1878368,0,0,0],[0,0,1878368,0]], m.matrix
167
+ end
168
+
169
+ end
data/spec/ptap_spec.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'test/unit'
2
+ require 'bioinform/support/ptap'
3
+
4
+ class TestEnumerablePmap < Test::Unit::TestCase
5
+ def test_ptap
6
+ assert_equal ['abc','def','ghi'], ['abc','','','def','ghi'].ptap('',&:delete)
7
+
8
+ x = ['abc','','','def','ghi']
9
+ assert_equal false, ['abc','def','ghi'].equal?(x.ptap('',&:delete))
10
+
11
+ x = ['abc','','','def','ghi']
12
+ assert_equal true, x.equal?(x.ptap('',&:delete))
13
+
14
+ x = ['abc','','','def','ghi']
15
+ assert_equal ['abc','','','def','ghi'], ['abc','','','def','ghi'].ptap(&:to_s)
16
+ end
17
+ end
data/spec/same_spec.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'test/unit'
2
+ require 'bioinform/support/same'
3
+
4
+ class TestEnumerableSame < Test::Unit::TestCase
5
+ def test_same
6
+ assert_equal(true, [1,3,9,7].same?(&:even?))
7
+ assert_equal(true, [4,8,2,2].same?(&:even?))
8
+ assert_equal(false, [1,8,3,2].same?(&:even?))
9
+
10
+ assert_equal(true, %w{cat dog rat}.same?(&:length))
11
+ assert_equal(false, %w{cat dog rabbit}.same?(&:length))
12
+
13
+ assert_equal(true, %w{cat cat cat}.same?)
14
+ assert_equal(false, %w{cat dog rat}.same?)
15
+
16
+ assert_equal(true, [].same?(&:length))
17
+ assert_equal(true, [].same?)
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bioinform
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ilya Vorontsov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: active_support
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 3.0.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 3.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '2.0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '2.0'
46
+ description: A bunch of useful classes for bioinformatics
47
+ email:
48
+ - prijutme4ty@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE
56
+ - README.md
57
+ - Rakefile
58
+ - bioinform.gemspec
59
+ - lib/bioinform.rb
60
+ - lib/bioinform/data_models.rb
61
+ - lib/bioinform/data_models/iupac.rb
62
+ - lib/bioinform/data_models/iupac_word.rb
63
+ - lib/bioinform/data_models/positional_count_matrix.rb
64
+ - lib/bioinform/data_models/positional_matrix.rb
65
+ - lib/bioinform/data_models/positional_probability_matrix.rb
66
+ - lib/bioinform/data_models/positional_weight_matrix.rb
67
+ - lib/bioinform/support.rb
68
+ - lib/bioinform/support/callable_symbol.rb
69
+ - lib/bioinform/support/curry_except_self.rb
70
+ - lib/bioinform/support/multiline_squish.rb
71
+ - lib/bioinform/support/pmap.rb
72
+ - lib/bioinform/support/ptap.rb
73
+ - lib/bioinform/support/same.rb
74
+ - lib/bioinform/support/yaml_dump_file.rb
75
+ - lib/bioinform/version.rb
76
+ - spec/callable_symbol_spec.rb
77
+ - spec/pmap_test.rb
78
+ - spec/positional_matrix_spec.rb
79
+ - spec/ptap_spec.rb
80
+ - spec/same_spec.rb
81
+ homepage: ''
82
+ licenses: []
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ! '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 1.8.24
102
+ signing_key:
103
+ specification_version: 3
104
+ summary: Classes for work with different input formats of positional matrices and
105
+ IUPAC-words and making simple transform and statistics with them. Also module includes
106
+ several useful extensions for Enumerable module like parametric map and callable
107
+ symbols
108
+ test_files:
109
+ - spec/callable_symbol_spec.rb
110
+ - spec/pmap_test.rb
111
+ - spec/positional_matrix_spec.rb
112
+ - spec/ptap_spec.rb
113
+ - spec/same_spec.rb