RubyGems - bioinform - Versions diffs - 0.1.7 → 0.1.8 - Mend

bioinform 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

data/TODO.txt +7 -2
data/bin/merge_into_collection +4 -0
data/bin/pcm2pwm +1 -1
data/bin/split_motifs +1 -1
data/bioinform.gemspec +2 -0
data/lib/bioinform/cli/merge_into_collection.rb +76 -0
data/lib/bioinform/cli/pcm2pwm.rb +20 -20
data/lib/bioinform/cli/split_motifs.rb +21 -20
data/lib/bioinform/cli.rb +16 -2
data/lib/bioinform/data_models/collection.rb +13 -10
data/lib/bioinform/data_models/pcm.rb +2 -2
data/lib/bioinform/data_models/pm.rb +24 -37
data/lib/bioinform/data_models/ppm.rb +2 -2
data/lib/bioinform/data_models/pwm.rb +2 -2
data/lib/bioinform/data_models.rb +8 -8
data/lib/bioinform/parsers/parser.rb +10 -5
data/lib/bioinform/parsers/splittable_parser.rb +57 -0
data/lib/bioinform/parsers/string_fantom_parser.rb +3 -3
data/lib/bioinform/parsers/string_parser.rb +5 -24
data/lib/bioinform/parsers/trivial_parser.rb +19 -3
data/lib/bioinform/parsers/yaml_parser.rb +35 -0
data/lib/bioinform/parsers.rb +6 -4
data/lib/bioinform/support/parameters.rb +19 -0
data/lib/bioinform/support/partial_sums.rb +1 -1
data/lib/bioinform/support.rb +11 -10
data/lib/bioinform/version.rb +1 -1
data/lib/bioinform.rb +5 -5
data/spec/cli/cli_spec.rb +8 -7
data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -0
data/spec/cli/data/{KLF4_f2.pwm.result → merge_into_collection/KLF4_f2.pwm} +0 -0
data/spec/cli/data/{SP1_f1.pwm.result → merge_into_collection/SP1_f1.pwm} +0 -0
data/spec/cli/data/merge_into_collection/collection.txt.result +40 -0
data/spec/cli/data/merge_into_collection/collection.yaml.result +185 -0
data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +185 -0
data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -0
data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -0
data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -0
data/spec/cli/data/{KLF4 f2 spaced name.pcm → pcm2pwm/KLF4 f2 spaced name.pcm} +0 -0
data/spec/cli/data/{KLF4_f2.pcm → pcm2pwm/KLF4_f2.pcm} +0 -0
data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -0
data/spec/cli/data/{SP1_f1.pcm → pcm2pwm/SP1_f1.pcm} +0 -0
data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -0
data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -0
data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -0
data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -0
data/spec/cli/data/split_motifs/collection.yaml +197 -0
data/spec/cli/data/split_motifs/plain_collection.txt +38 -0
data/spec/cli/merge_into_collection_spec.rb +100 -0
data/spec/cli/pcm2pwm_spec.rb +3 -3
data/spec/cli/split_motifs_spec.rb +74 -3
data/spec/data_models/collection_spec.rb +2 -2
data/spec/data_models/pcm_spec.rb +2 -2
data/spec/data_models/pm_spec.rb +10 -27
data/spec/data_models/ppm_spec.rb +2 -2
data/spec/data_models/pwm_spec.rb +3 -3
data/spec/fabricators/collection_fabricator.rb +8 -0
data/spec/fabricators/pm_fabricator.rb +43 -0
data/spec/parsers/parser_spec.rb +29 -37
data/spec/parsers/string_fantom_parser_spec.rb +38 -35
data/spec/parsers/string_parser_spec.rb +33 -66
data/spec/parsers/trivial_parser_spec.rb +48 -6
data/spec/parsers/yaml_parser_spec.rb +50 -0
data/spec/spec_helper.rb +2 -6
data/spec/support/advanced_scan_spec.rb +2 -2
data/spec/support/array_product_spec.rb +2 -2
data/spec/support/array_zip_spec.rb +2 -2
data/spec/support/collect_hash_spec.rb +2 -2
data/spec/support/delete_many_spec.rb +2 -2
data/spec/support/inverf_spec.rb +2 -2
data/spec/support/multiline_squish_spec.rb +2 -2
data/spec/support/partial_sums_spec.rb +2 -2
data/spec/support/same_by_spec.rb +2 -2
metadata +86 -12

data/spec/parsers/parser_spec.rb CHANGED Viewed

@@ -1,11 +1,11 @@
-require 'spec_helper'
-require 'bioinform/parsers/parser'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/parsers/parser'
 module Bioinform
   describe Parser do
     context '#initialize' do
       it 'should accept an array correctly' do
-        Parser.new([[1,2,3,4],[5,6,7,8]]).parse[:matrix].should == [[1,2,3,4],[5,6,7,8]]
+        Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
       end
       it 'should treat several arguments as an array composed of them' do
         Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
@@ -21,6 +21,7 @@ module Bioinform
         expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
       end
     end
     context '::parse' do
       it 'should behave like Parser.new(input).parse!' do
         Parser.parse([1,2,3,4],[5,6,7,8]).should  == Parser.new([1,2,3,4],[5,6,7,8]).parse
@@ -37,9 +38,13 @@ module Bioinform
         Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
         Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
       end
     end
+    context '::split_on_motifs' do
+      it 'should be able to get a single PM' do
+        Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
+      end
+    end
     context '::normalize_hash_keys' do
       it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
@@ -97,56 +102,43 @@ module Bioinform
     end
     good_cases = {
-      'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
-                        matrix: [[0,1,2,3],[10,11,12,13]] },
+      'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
+                      result: Fabricate(:pm_unnamed) },
-      'Array 4xN' => {input: [[0,10],[1,11],[2,12],[3,13]],
-                        matrix: [[0,1,2,3],[10,11,12,13]] },
+      'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
+                      result: Fabricate(:pm_unnamed) },
-      'Hash A,C,G,T => Arrays' => { input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13]},
-                                    matrix: [[0,1,2,3],[10,11,12,13]] },
+      'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
+                                    result: Fabricate(:pm_unnamed) },
-      'Hash array of hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13}],
-                                  matrix: [[0,1,2,3],[10,11,12,13]] },
+      'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
+                                  result: Fabricate(:pm_unnamed) },
-      'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]],
-                                                                                  matrix: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]] },
+      'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
+                                                                                  result: Fabricate(:pm_4x4_unnamed) },
-      'Hash A,C,G,T => 4-Arrays' => { input: {:A => [0,10,100,1000], :c => [1,11,101,1001],'g' => [2,12,102,1002],'T' => [3,13,103,1003]},
-                                      matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] },
+      'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
+                                      result: Fabricate(:pm_4x4_unnamed) },
-      '4-Arrays of A,C,G,T hashes' => { input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
-                                                {:A => 10, :c => 11, 'g' => 12, 'T' => 13},
-                                                {:A => 100, :c => 101, 'g' => 102, 'T' => 103},
-                                                {:A => 1000, :c => 1001, 'g' => 1002, 'T' => 1003}],
-                                        matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] }
+      '4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
+                                                {:A => 5, :c => 6, 'g' => 7, 'T' => 8},
+                                                {:A => 9, :c => 10, 'g' => 11, 'T' => 12},
+                                                {:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
+                                        result: Fabricate(:pm_4x4_unnamed) }
     }
     bad_cases = {
       'Nil object on input' => {input: nil},
       'Empty array on input' => {input: []},
       'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
       'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
       'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
-      'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
-                                                     {:A => 10, :c => 11, 'g' => 12}] },
-      'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
-                                                  {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
-      'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3},
-                                                     {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
+      'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
+      'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
+      'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
       'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
       'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
       'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
       'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
     }

data/spec/parsers/string_fantom_parser_spec.rb CHANGED Viewed

@@ -1,16 +1,16 @@
-require 'spec_helper'
-require 'bioinform/parsers/string_fantom_parser'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/parsers/string_fantom_parser'
 module Bioinform
   describe StringFantomParser do
-    describe '#parse' do
+    describe '#split_on_motifs' do
       it 'should be able to parse several motifs' do
-        input = <<-EOS
+        input = "
 //
 NA  motif_1
 P0	A	C	G	T
-P1	0	1	2	3
-P2	4	5	6	7
+P1	0 1	2	3
+P2	4 5	6	7
 //
 //
 NA  motif_2
@@ -22,41 +22,44 @@ P3  9 10 11 12
 NA  motif_3
 P0	A	C	G	T
 P1	2 3	4 5
-P2	6 7 8 9
-        EOS
-        StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'},
-                                                    {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name: 'motif_2'},
-                                                    {matrix: [[2,3,4,5],[6,7,8,9]], name: 'motif_3'} ]
-      end
-      it 'should be able to parse motif with additional rows' do
-        input = <<-EOS
-NA  motif_1
-P0	A C G T S P
-P1	0 1 2 3 5 10
-P2	4 5 6 7 5 11
-        EOS
-        StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'} ]
+P2	6 7 8 9"
+        StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
       end
     end
     good_cases = {
       'string in Fantom-format' => {input: "
-        NA  motif_CTNCAG
+        NA  PM_name
+        P0	A	C	G	T
+        P1	1	2	3 4
+        P2	5	6	7 8",
+        result: Fabricate(:pm)
+      },
+      'motif with additional rows' => {input: "
+        NA  PM_name
+        P0	A C G T S P
+        P1	1 2 3 4 5 10
+        P2	5 6 7 8 5 11",
+        result: Fabricate(:pm)
+      },
+      'string with more than 10 positions(2-digit row numbers)' => {input: "
+        NA  PM_name
         P0	A	C	G	T
-        P1	0	1878368	0	0
-        P2	0	0	0	1878368
-        P3	469592	469592	469592	469592
-        P4	0	1878368	0	0
-        P5	1878368	0	0	0
-        P6	0	0	1878368	0",
-        matrix: [ [0.0, 1878368.0, 0.0, 0.0],
-                  [0.0, 0.0, 0.0, 1878368.0],
-                  [469592.0, 469592.0, 469592.0, 469592.0],
-                  [0.0, 1878368.0, 0.0, 0.0],
-                  [1878368.0, 0.0, 0.0, 0.0],
-                  [0.0, 0.0, 1878368.0, 0.0]],
-        name: 'motif_CTNCAG'
+        P1	1	2	3	4
+        P2	5	6	7	8
+        P3	1	2	3	4
+        P4	5	6	7	8
+        P5	1	2	3	4
+        P6	5	6	7	8
+        P7	1	2	3	4
+        P8	5	6	7	8
+        P9	1	2	3	4
+        P10	5	6	7	8
+        P11	1	2	3	4
+        P12	5	6	7	8",
+        result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
       }
     }

data/spec/parsers/string_parser_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/parsers/string_parser'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/parsers/string_parser'
 module Bioinform
   describe StringParser do
@@ -7,37 +7,35 @@ module Bioinform
     describe '#each' do
       it 'should yield consequent results of #parse! while it returns result' do
         parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
-        expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
+        expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
+                                                             OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
+                                                             OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
       end
       it 'should restart parser from the beginning each time' do
         parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
         3.times do
-          expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
+          expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
+                                                               OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
+                                                               OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
         end
       end
     end
-    context '::split' do
+    context '::split_on_motifs' do
       it 'should be able to get a single PM' do
-        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
+        StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8").should == [ Fabricate(:pm_unnamed) ]
       end
       it 'should be able to split several PMs separated with an empty line' do
-        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
+        StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n\n 15 16 17 18 \n 11 21 31 41").should ==
+                                                                [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second, name: nil) ]
       end
       it 'should be able to split several PMs separated with name' do
-        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
-        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
+        StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \nPM_second\n 15 16 17 18 \n 11 21 31 41").should ==
+                                                                [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second) ]
       end
-    end
-    context '::split_on_motifs' do
-      it 'should be able to split string into PMs' do
-        result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
-        result.map{|pm| pm.matrix}.should == [  [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]]  ]
-        result.map{|pm| pm.name}.should == [nil, 'Name']
+      it 'should be able to split several PMs separated with both name and empty line' do
+        StringParser.split_on_motifs("PM_first\n1 2 3 4 \n 5 6 7 8 \n\nPM_second\n 15 16 17 18 \n 11 21 31 41\n\n\n").should ==
+                                                                [ Fabricate(:pm_first), Fabricate(:pm_second) ]
       end
       it 'should create PMs by default' do
         result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
@@ -50,54 +48,23 @@ module Bioinform
     end
     good_cases = {
-      'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
-                      matrix: [[1,2,3,4],[5,6,7,8]] },
-      '4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",
-                      matrix: [[1,2,3,4],[5,6,7,8]] },
-      'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
-                            matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
-      'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
-                                                      matrix: [[1,2,3,4],[5,6,7,8]],
-                                                      name: 'TestMatrix' },
+      'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",  result: Fabricate(:pm_unnamed) },
+      '4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",  result: Fabricate(:pm_unnamed) },
+      'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8",  result: Fabricate(:pm) },
+      'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8",  result: Fabricate(:pm) },
       'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
-                            matrix: [[1,2,3,4],[5,6,7,8]], name: 'Testmatrix_first:subname+sub-subname' },
-      'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210",
-                                      matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
-      'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3",
-                                  matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
-      'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6   7 8",
-                                                matrix: [[1,2,3,4],[5,6,7,8]] },
-      'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8  \n\t\n",
-                      matrix: [[1,2,3,4],[5,6,7,8]] },
-      'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
-                      matrix: [[1,2,3,4],[5,6,7,8]] },
-      'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",
-                      matrix: [[1,2,3,4],[5,6,7,8]] },
-      'Nx4 string with name and acgt-header' => {input: "Name\nA C G T\n1 2 3 4\n5 6 7 8",
-                      matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'},
-      'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",
-                      matrix: [[1,2,3,4],[5,6,7,8]] },
-      '4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n0 0 0 0\n2 2 2 2",
-                      matrix: [[1,2,3,4],[5,6,7,8],[0,0,0,0],[2,2,2,2]] },
-      '4x4 string with acgt-row-markers' => {input: "A|1 2 3 4\nC|5 6 7 8\nG|0 0 0 0\nT|2 2 2 2",
-                      matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]] },
-      '4x4 string with name and acgt-row-markers' => {input: "Name\nA:1 2 3 4\nC:5 6 7 8\nG:0 0 0 0\nT:2 2 2 2",
-                      matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]], name: 'Name' }
+                                                       result: Fabricate(:pm, name: 'Testmatrix_first:subname+sub-subname') },
+      'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210",  result: Fabricate(:pm_with_floats) },
+      'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3",  result: Fabricate(:pm_with_floats) },
+      'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6   7 8",  result: Fabricate(:pm_unnamed) },
+      'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8  \n\t\n",  result: Fabricate(:pm_unnamed) },
+      'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",  result: Fabricate(:pm_unnamed) },
+      'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",  result: Fabricate(:pm_unnamed) },
+      'Nx4 string with name and acgt-header' => {input: "PM_name\nA C G T\n1 2 3 4\n5 6 7 8",  result: Fabricate(:pm)},
+      'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",  result: Fabricate(:pm_unnamed) },
+      '4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n9 10 11 12\n13 14 15 16",  result: Fabricate(:pm_4x4_unnamed) },
+      '4x4 string with acgt-row-markers' => {input: "A|1 5 9 13\nC|2 6 10 14\nG|3 7 11 15\nT|4 8 12 16",  result: Fabricate(:pm_4x4_unnamed) },
+      '4x4 string with name and acgt-row-markers' => {input: "PM_name\nA:1 5 9 13\nC:2 6 10 14\nG:3 7 11 15\nT:4 8 12 16",  result: Fabricate(:pm_4x4) }
     }
     bad_cases = {

data/spec/parsers/trivial_parser_spec.rb CHANGED Viewed

@@ -1,5 +1,6 @@
-require 'spec_helper'
-require 'bioinform/parsers/parser'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/parsers/parser'
+require_relative '../../lib/bioinform/data_models/collection'
 module Bioinform
   describe TrivialParser do
@@ -8,15 +9,56 @@ module Bioinform
         TrivialParser.instance_method(:initialize).arity.should == 1
       end
     end
-    context '#parser!' do
-      it 'should return input of that was passed to initialize' do
-        TrivialParser.new('stub input').parse!.should == 'stub input'
+    context '#parse!' do
+      it 'should return OpenStruct based on input of that was passed to initialize when input is a Hash' do
+        TrivialParser.new(matrix: 'stub matrix', name: 'stub name').parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
+      end
+      it 'should return OpenStruct based on input of that was passed to initialize when input is a OpenStruct' do
+        TrivialParser.new(OpenStruct.new(matrix: 'stub matrix', name: 'stub name')).parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
       end
     end
+    context '::split_on_motifs' do
+      it 'should be able to get a single PM' do
+        TrivialParser.split_on_motifs({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:'Name') ]
+      end
+    end
     it 'can be used to create PM with {matrix: ..., name: ...} form' do
       pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
       pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
       pm.name.should == 'Name'
     end
+    it 'can be used to create PM from PM (make copy)' do
+      pm = Fabricate(:pm)
+      pm_copy = PM.new(pm, TrivialParser)
+      pm_copy.should == pm
+    end
+  end
+  describe TrivialCollectionParser do
+    before :each do
+      @pm_1 = Fabricate(:pm_first)
+      @pm_2 = Fabricate(:pm_second)
+      @collection = Fabricate(:two_elements_collection)
+    end
+    describe '#parse!' do
+      it 'can be used to obtain PMs from Collection' do
+        @parser = TrivialCollectionParser.new(@collection)
+        @parser.parse!.should == @pm_1
+        @parser.parse!.should == @pm_2
+        expect{ @parser.parse! }.to raise_error
+      end
+    end
+    describe '::split_on_motifs' do
+      it 'should be able to split collection into PMs' do
+        TrivialCollectionParser.split_on_motifs(@collection).should == [@pm_1, @pm_2]
+      end
+    end
   end
-end
+end

data/spec/parsers/yaml_parser_spec.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require 'yaml'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/parsers/yaml_parser'
+require_relative '../../lib/bioinform/data_models/collection'
+module Bioinform
+  describe YAMLParser do
+    context '#parse!' do
+      it 'should return PM that was encoded in YAML format' do
+        pm = Fabricate(:pm)
+        parser = YAMLParser.new(pm.to_yaml)
+        parser.parse!.should == pm
+      end
+    end
+    it 'can be used to create PM from yaml-string' do
+      pm = Fabricate(:pm)
+      pm_copy = PM.new(pm.to_yaml, YAMLParser)
+      pm_copy.should == pm
+    end
+    context '::split_on_motifs' do
+      it 'should be able to get a single PM' do
+        pm = Fabricate(:pm)
+        YAMLParser.split_on_motifs(pm.to_yaml, PM).should == [ pm ]
+      end
+    end
+  end
+  describe YAMLCollectionParser do
+    before :each do
+      @pm_1 = Fabricate(:pm_first)
+      @pm_2 = Fabricate(:pm_second)
+      @collection = Collection.new
+      @collection << @pm_1 << @pm_2
+    end
+    context '::split_on_motifs' do
+      it 'should be able to split collection into PMs' do
+        YAMLCollectionParser.split_on_motifs(@collection.to_yaml).should == [@pm_1, @pm_2]
+      end
+    end
+    context '#parse!' do
+      it 'should return PMs which were in encoded YAML format' do
+        @parser = YAMLCollectionParser.new(@collection.to_yaml)
+        @parser.parse!.should == @pm_1
+        @parser.parse!.should == @pm_2
+        expect{ @parser.parse! }.to raise_error
+      end
+    end
+  end
+end

data/spec/spec_helper.rb CHANGED Viewed

@@ -5,6 +5,7 @@ require 'rspec'
 require 'fileutils'
 require 'stringio'
+require 'fabrication'
 # from minitest
 def capture_io(&block)
@@ -39,12 +40,7 @@ def parser_specs(parser_klass, good_cases, bad_cases)
     good_cases.each do |case_description, input_and_result|
       it "should be able to parse #{case_description}" do
         result = parser_klass.new(input_and_result[:input]).parse
-        result[:matrix].should == input_and_result[:matrix]
-        if input_and_result.has_key?(:name)
-          result[:name].should == input_and_result[:name]
-        else
-          result[:name].should be_nil
-        end
+        Bioinform::PM.new(result).should == input_and_result[:result]
       end
     end

data/spec/support/advanced_scan_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/advanced_scan'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/advanced_scan'
 describe StringScanner do
   context '#advanced_scan' do

data/spec/support/array_product_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/array_product'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/array_product'
 describe Array do
   context '::product' do

data/spec/support/array_zip_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/array_zip'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/array_zip'
 describe Array do
   context '::zip' do

data/spec/support/collect_hash_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/collect_hash'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/collect_hash'
 describe Enumerable do
   # %w{A C G T}.collect_hash{|k| [k*2, k*3] }

data/spec/support/delete_many_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/delete_many'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/delete_many'
 describe Array do
   before :each do

data/spec/support/inverf_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/inverf'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/inverf'
 describe 'Math#inverf' do
   it 'should be erf(inverf(x)) == x' do

data/spec/support/multiline_squish_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/multiline_squish'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/multiline_squish'
 describe String do
   describe '#multiline_squish' do

data/spec/support/partial_sums_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/partial_sums'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/partial_sums'
 describe 'Array#partial_sums' do
   context 'when no initial value given' do

data/spec/support/same_by_spec.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'spec_helper'
-require 'bioinform/support/same_by'
+require_relative '../spec_helper'
+require_relative '../../lib/bioinform/support/same_by'
 describe Enumerable do
   describe '#same_by?' do