bioinform 0.1.17 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
Fabricator(:three_elements_collection, class_name: Bioinform::Collection, aliases: [:pm_collection]) do
|
|
2
|
-
name 'PM_collection'
|
|
3
|
-
after_build{|collection| collection << Fabricate(:pm_1) << Fabricate(:pm_2) << Fabricate(:pm_3) }
|
|
4
|
-
end
|
|
5
|
-
|
|
6
|
-
Fabricator(:unnamed_pm_collection, from: :pm_collection) do
|
|
7
|
-
name nil
|
|
8
|
-
end
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
Fabricator(:motif, class_name: Bioinform::Motif) do
|
|
2
|
-
end
|
|
3
|
-
|
|
4
|
-
Fabricator(:motif_with_name, from: :motif) do
|
|
5
|
-
name 'Motif name'
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
Fabricator(:motif_pcm, from: :motif) do
|
|
9
|
-
pcm(fabricator: :pcm)
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
Fabricator(:motif_pwm, from: :motif) do
|
|
13
|
-
pwm(fabricator: :pwm)
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
Fabricator(:motif_ppm, from: :motif) do
|
|
17
|
-
ppm(fabricator: :ppm)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
Fabricator(:motif_pcm_and_ppm, from: :motif) do
|
|
21
|
-
pcm(fabricator: :pcm)
|
|
22
|
-
ppm(fabricator: :ppm)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
Fabricator(:motif_pwm_and_ppm, from: :motif) do
|
|
26
|
-
pwm(fabricator: :pwm)
|
|
27
|
-
ppm(fabricator: :ppm)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
Fabricator(:motif_pcm_and_pwm, from: :motif) do
|
|
31
|
-
pcm(fabricator: :pcm)
|
|
32
|
-
pwm(fabricator: :pwm)
|
|
33
|
-
end
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
Fabricator(:pcm, class_name: Bioinform::PCM) do
|
|
2
|
-
initialize_with{ Bioinform::PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'PCM_name') }
|
|
3
|
-
end
|
|
4
|
-
|
|
5
|
-
Fabricator(:pcm_with_floats, from: :pcm) do
|
|
6
|
-
matrix [[1, 2.3, 3.2, 1],[4.4, 0.1, 0.9, 2.1]]
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
Fabricator(:completely_different_pcm, from: :pcm) do
|
|
10
|
-
matrix [[101,207,138,248],[85,541,7,61]]
|
|
11
|
-
name 'PCM_another_name'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
Fabricator(:pcm_1, from: :pcm) do
|
|
15
|
-
matrix [[7,10,2,3],[4,5,6,7]]
|
|
16
|
-
name 'motif_1'
|
|
17
|
-
end
|
|
18
|
-
Fabricator(:pcm_2, from: :pcm) do
|
|
19
|
-
matrix [[5,7,4,6],[11,6,2,3],[10,3,3,6]]
|
|
20
|
-
name 'motif_2'
|
|
21
|
-
end
|
|
22
|
-
Fabricator(:pcm_3, from: :pcm) do
|
|
23
|
-
matrix [[3,4,1,14],[9,2,9,2]]
|
|
24
|
-
name 'motif_3'
|
|
25
|
-
end
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
Fabricator(:pm, class_name: Bioinform::PM) do
|
|
2
|
-
initialize_with{ Bioinform::PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name') }
|
|
3
|
-
end
|
|
4
|
-
|
|
5
|
-
Fabricator(:pm_unnamed, from: :pm) do
|
|
6
|
-
name nil
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
Fabricator(:pm_first, from: :pm) do
|
|
11
|
-
name 'PM_first'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
Fabricator(:pm_second, from: :pm) do
|
|
15
|
-
matrix [[15,16,17,18],[11,21,31,41]]
|
|
16
|
-
name 'PM_second'
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
Fabricator(:pm_4x4, from: :pm) do
|
|
21
|
-
matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
Fabricator(:pm_4x4_unnamed, from: :pm_4x4) do
|
|
25
|
-
name nil
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
Fabricator(:pm_with_floats, from: :pm_unnamed) do
|
|
29
|
-
matrix [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
Fabricator(:pm_1, from: :pm) do
|
|
33
|
-
matrix [[0,1,2,3],[4,5,6,7]]
|
|
34
|
-
name 'motif_1'
|
|
35
|
-
end
|
|
36
|
-
Fabricator(:pm_2, from: :pm) do
|
|
37
|
-
matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12]]
|
|
38
|
-
name 'motif_2'
|
|
39
|
-
end
|
|
40
|
-
Fabricator(:pm_3, from: :pm) do
|
|
41
|
-
matrix [[2,3,4,5],[6,7,8,9]]
|
|
42
|
-
name 'motif_3'
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
Fabricator(:pm_4,from: :pm) do
|
|
46
|
-
matrix [[1,0,1,0],[0,0,0,0],[1,2,3,4]]
|
|
47
|
-
name 'pm 4'
|
|
48
|
-
end
|
|
49
|
-
Fabricator(:pm_5, from: :pm) do
|
|
50
|
-
matrix [[1,2,1,2],[0,3,6,9],[1,2,3,4]]
|
|
51
|
-
name 'pm 5'
|
|
52
|
-
end
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
Fabricator(:ppm, class_name: Bioinform::PPM) do
|
|
2
|
-
initialize_with{ Bioinform::PPM.new(matrix: [[0.2, 0.3, 0.3, 0.2],[0.7, 0.2, 0.0, 0.1]]) }
|
|
3
|
-
name 'PPM_name'
|
|
4
|
-
end
|
|
5
|
-
|
|
6
|
-
# It has the same name as original pcm because PCM#to_ppm doesn't change the name
|
|
7
|
-
Fabricator(:ppm_by_pcm, class_name: Bioinform::PPM) do
|
|
8
|
-
initialize_with{ Fabricate(:pcm).to_ppm }
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
Fabricator(:ppm_pcm_divided_by_count, from: :ppm) do
|
|
12
|
-
# this matrix should be initialized manually - it's used for spec checking PCM#to_ppm
|
|
13
|
-
matrix [[1.0/7.0, 2.0/7.0, 3.0/7.0, 1.0/7.0], [4.0/7.0, 0.0/7.0, 1.0/7.0, 2.0/7.0]]
|
|
14
|
-
end
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
Fabricator(:pwm, class_name: Bioinform::PWM) do
|
|
2
|
-
initialize_with{ Bioinform::PWM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PWM_name')}
|
|
3
|
-
end
|
|
4
|
-
|
|
5
|
-
# It has name 'PCM_name' because name isn't converted during #to_pwm
|
|
6
|
-
Fabricator(:pwm_by_pcm, class_name: Bioinform::PWM) do
|
|
7
|
-
initialize_with{ Fabricate(:pcm).to_pwm }
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_1, from: :pwm) do
|
|
11
|
-
matrix [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_10, from: :pwm) do
|
|
15
|
-
matrix [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
|
|
16
|
-
end
|
data/spec/parsers/parser_spec.rb
DELETED
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
require_relative '../spec_helper'
|
|
2
|
-
require_relative '../../lib/bioinform/parsers/parser'
|
|
3
|
-
|
|
4
|
-
module Bioinform
|
|
5
|
-
describe Parser do
|
|
6
|
-
context '#initialize' do
|
|
7
|
-
it 'should accept an array correctly' do
|
|
8
|
-
Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
|
9
|
-
end
|
|
10
|
-
it 'should treat several arguments as an array composed of them' do
|
|
11
|
-
Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
|
12
|
-
end
|
|
13
|
-
it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
|
|
14
|
-
Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
context '::parse!' do
|
|
19
|
-
it 'should behave like Parser.new(input).parse!' do
|
|
20
|
-
Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
|
|
21
|
-
expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
context '::parse' do
|
|
26
|
-
it 'should behave like Parser.new(input).parse!' do
|
|
27
|
-
Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
|
|
28
|
-
Parser.parse([1,2,3],[4,5,6]).should be_nil
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
context '::choose' do
|
|
33
|
-
it 'should create parser of appropriate type' do
|
|
34
|
-
Parser.choose([[1,2,3,4],[5,6,7,8]]).should be_kind_of(Parser)
|
|
35
|
-
Parser.choose([[1,2,3,4],[5,6,7,8]]).input.should == [[1,2,3,4],[5,6,7,8]]
|
|
36
|
-
Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').should be_kind_of(TrivialParser)
|
|
37
|
-
Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').input.should == {matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}
|
|
38
|
-
Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
|
|
39
|
-
Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
context '::split_on_motifs' do
|
|
44
|
-
it 'should be able to get a single PM' do
|
|
45
|
-
Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
context '::normalize_hash_keys' do
|
|
50
|
-
it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
|
|
51
|
-
Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
context '::need_transpose?' do
|
|
56
|
-
it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
|
|
57
|
-
Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
|
|
58
|
-
Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
context '::array_from_acgt_hash' do
|
|
62
|
-
it 'should convert hash of arrays to a transposed array of arrays' do
|
|
63
|
-
input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
|
|
64
|
-
Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
|
|
65
|
-
end
|
|
66
|
-
it 'should convert hash of numbers to an array of numbers' do
|
|
67
|
-
input = {A: 1, C: 2, G: 3, T: 4}
|
|
68
|
-
Parser.array_from_acgt_hash(input).should == [1,2,3,4]
|
|
69
|
-
end
|
|
70
|
-
it 'should process both symbolic and string keys, in both upcase and downcase' do
|
|
71
|
-
input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
|
|
72
|
-
input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
|
|
73
|
-
Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
context '::try_convert_to_array' do
|
|
78
|
-
it 'should not change array' do
|
|
79
|
-
inputs = []
|
|
80
|
-
inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
|
|
81
|
-
inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
|
|
82
|
-
inputs.each do |input|
|
|
83
|
-
Parser.try_convert_to_array( input ).should == input
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
it 'should convert ACGT-Hashes to an array of positions (not letters)' do
|
|
87
|
-
Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
context '#parse' do
|
|
92
|
-
it 'should give the same result as #parse!' do
|
|
93
|
-
parser = Parser.new('stub parser')
|
|
94
|
-
parser.stub(:parse!).and_return('stub result')
|
|
95
|
-
parser.parse.should == 'stub result'
|
|
96
|
-
end
|
|
97
|
-
it 'should return nil if #parse! raised an exception' do
|
|
98
|
-
parser = Parser.new('stub parser')
|
|
99
|
-
parser.stub(:parse!).and_raise
|
|
100
|
-
parser.parse.should be_nil
|
|
101
|
-
end
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
good_cases = {
|
|
105
|
-
'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
|
|
106
|
-
result: Fabricate(:pm_unnamed) },
|
|
107
|
-
|
|
108
|
-
'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
|
|
109
|
-
result: Fabricate(:pm_unnamed) },
|
|
110
|
-
|
|
111
|
-
'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
|
|
112
|
-
result: Fabricate(:pm_unnamed) },
|
|
113
|
-
|
|
114
|
-
'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
|
|
115
|
-
result: Fabricate(:pm_unnamed) },
|
|
116
|
-
|
|
117
|
-
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
|
|
118
|
-
result: Fabricate(:pm_4x4_unnamed) },
|
|
119
|
-
|
|
120
|
-
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
|
|
121
|
-
result: Fabricate(:pm_4x4_unnamed) },
|
|
122
|
-
|
|
123
|
-
'4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
|
|
124
|
-
{:A => 5, :c => 6, 'g' => 7, 'T' => 8},
|
|
125
|
-
{:A => 9, :c => 10, 'g' => 11, 'T' => 12},
|
|
126
|
-
{:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
|
|
127
|
-
result: Fabricate(:pm_4x4_unnamed) }
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
bad_cases = {
|
|
131
|
-
'Nil object on input' => {input: nil},
|
|
132
|
-
'Empty array on input' => {input: []},
|
|
133
|
-
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
|
134
|
-
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
|
135
|
-
'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
|
|
136
|
-
'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
|
|
137
|
-
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
|
138
|
-
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
|
139
|
-
'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
|
|
140
|
-
'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
|
|
141
|
-
'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
|
|
142
|
-
'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
parser_specs(Parser, good_cases, bad_cases)
|
|
146
|
-
context '#parser!' do
|
|
147
|
-
it "should raise an exception on parsing empty list to parser" do
|
|
148
|
-
expect{ Parser.new().parse! }.to raise_error
|
|
149
|
-
end
|
|
150
|
-
end
|
|
151
|
-
end
|
|
152
|
-
end
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
require_relative '../spec_helper'
|
|
2
|
-
require_relative '../../lib/bioinform/parsers/string_fantom_parser'
|
|
3
|
-
|
|
4
|
-
module Bioinform
|
|
5
|
-
describe StringFantomParser do
|
|
6
|
-
describe '#split_on_motifs' do
|
|
7
|
-
it 'should be able to parse several motifs' do
|
|
8
|
-
input = "
|
|
9
|
-
//
|
|
10
|
-
NA motif_1
|
|
11
|
-
P0 A C G T
|
|
12
|
-
P1 0 1 2 3
|
|
13
|
-
P2 4 5 6 7
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
NA motif_2
|
|
17
|
-
P0 A C G T
|
|
18
|
-
P1 1 2 3 4
|
|
19
|
-
P2 5 6 7 8
|
|
20
|
-
P3 9 10 11 12
|
|
21
|
-
//
|
|
22
|
-
NA motif_3
|
|
23
|
-
P0 A C G T
|
|
24
|
-
P1 2 3 4 5
|
|
25
|
-
P2 6 7 8 9"
|
|
26
|
-
StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
good_cases = {
|
|
31
|
-
'string in Fantom-format' => {input: "
|
|
32
|
-
NA PM_name
|
|
33
|
-
P0 A C G T
|
|
34
|
-
P1 1 2 3 4
|
|
35
|
-
P2 5 6 7 8",
|
|
36
|
-
result: Fabricate(:pm)
|
|
37
|
-
},
|
|
38
|
-
|
|
39
|
-
'motif with additional rows' => {input: "
|
|
40
|
-
NA PM_name
|
|
41
|
-
P0 A C G T S P
|
|
42
|
-
P1 1 2 3 4 5 10
|
|
43
|
-
P2 5 6 7 8 5 11",
|
|
44
|
-
result: Fabricate(:pm)
|
|
45
|
-
},
|
|
46
|
-
|
|
47
|
-
'string with more than 10 positions(2-digit row numbers)' => {input: "
|
|
48
|
-
NA PM_name
|
|
49
|
-
P0 A C G T
|
|
50
|
-
P1 1 2 3 4
|
|
51
|
-
P2 5 6 7 8
|
|
52
|
-
P3 1 2 3 4
|
|
53
|
-
P4 5 6 7 8
|
|
54
|
-
P5 1 2 3 4
|
|
55
|
-
P6 5 6 7 8
|
|
56
|
-
P7 1 2 3 4
|
|
57
|
-
P8 5 6 7 8
|
|
58
|
-
P9 1 2 3 4
|
|
59
|
-
P10 5 6 7 8
|
|
60
|
-
P11 1 2 3 4
|
|
61
|
-
P12 5 6 7 8",
|
|
62
|
-
result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
bad_cases = { }
|
|
67
|
-
|
|
68
|
-
parser_specs(StringFantomParser, good_cases, bad_cases)
|
|
69
|
-
end
|
|
70
|
-
end
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
require_relative '../spec_helper'
|
|
2
|
-
require_relative '../../lib/bioinform/parsers/string_parser'
|
|
3
|
-
|
|
4
|
-
module Bioinform
|
|
5
|
-
describe StringParser do
|
|
6
|
-
|
|
7
|
-
describe '#each' do
|
|
8
|
-
it 'should yield consequent results of #parse! while it returns result' do
|
|
9
|
-
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
|
10
|
-
expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
|
|
11
|
-
OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
|
|
12
|
-
OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
|
|
13
|
-
end
|
|
14
|
-
it 'should restart parser from the beginning each time' do
|
|
15
|
-
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
|
16
|
-
3.times do
|
|
17
|
-
expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
|
|
18
|
-
OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
|
|
19
|
-
OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
context '::split_on_motifs' do
|
|
25
|
-
it 'should be able to get a single PM' do
|
|
26
|
-
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8").should == [ Fabricate(:pm_unnamed) ]
|
|
27
|
-
end
|
|
28
|
-
it 'should be able to split several PMs separated with an empty line' do
|
|
29
|
-
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n\n 15 16 17 18 \n 11 21 31 41").should ==
|
|
30
|
-
[ Fabricate(:pm_first, name: nil), Fabricate(:pm_second, name: nil) ]
|
|
31
|
-
end
|
|
32
|
-
it 'should be able to split several PMs separated with name' do
|
|
33
|
-
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \nPM_second\n 15 16 17 18 \n 11 21 31 41").should ==
|
|
34
|
-
[ Fabricate(:pm_first, name: nil), Fabricate(:pm_second) ]
|
|
35
|
-
end
|
|
36
|
-
it 'should be able to split several PMs separated with both name and empty line' do
|
|
37
|
-
StringParser.split_on_motifs("PM_first\n1 2 3 4 \n 5 6 7 8 \n\nPM_second\n 15 16 17 18 \n 11 21 31 41\n\n\n").should ==
|
|
38
|
-
[ Fabricate(:pm_first), Fabricate(:pm_second) ]
|
|
39
|
-
end
|
|
40
|
-
it 'should create PMs by default' do
|
|
41
|
-
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
|
42
|
-
result.each{|pm| pm.class.should == PM}
|
|
43
|
-
end
|
|
44
|
-
it 'should create PM subclass when it\'s specified' do
|
|
45
|
-
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8", PWM)
|
|
46
|
-
result.each{|pm| pm.class.should == PWM}
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
good_cases = {
|
|
51
|
-
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
|
52
|
-
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm_unnamed) },
|
|
53
|
-
'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
|
|
54
|
-
'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
|
|
55
|
-
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
|
56
|
-
result: Fabricate(:pm, name: 'Testmatrix_first:subname+sub-subname') },
|
|
57
|
-
'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210", result: Fabricate(:pm_with_floats) },
|
|
58
|
-
'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3", result: Fabricate(:pm_with_floats) },
|
|
59
|
-
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8", result: Fabricate(:pm_unnamed) },
|
|
60
|
-
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n", result: Fabricate(:pm_unnamed) },
|
|
61
|
-
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
|
62
|
-
'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
|
63
|
-
'Nx4 string with name and acgt-header' => {input: "PM_name\nA C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm)},
|
|
64
|
-
'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8", result: Fabricate(:pm_unnamed) },
|
|
65
|
-
'4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n9 10 11 12\n13 14 15 16", result: Fabricate(:pm_4x4_unnamed) },
|
|
66
|
-
'4x4 string with acgt-row-markers' => {input: "A|1 5 9 13\nC|2 6 10 14\nG|3 7 11 15\nT|4 8 12 16", result: Fabricate(:pm_4x4_unnamed) },
|
|
67
|
-
'4x4 string with name and acgt-row-markers' => {input: "PM_name\nA:1 5 9 13\nC:2 6 10 14\nG:3 7 11 15\nT:4 8 12 16", result: Fabricate(:pm_4x4) }
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
bad_cases = {
|
|
71
|
-
'string with non-numeric input' => {input: "1.23 4.56 78aaa 9.0\n9 -8.7 6.54 -3210" },
|
|
72
|
-
'string with empty exponent sign' => {input: "1.23 4.56 7.8 9.0\n 9e -8.7 6.54 3210" }
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
parser_specs(StringParser, good_cases, bad_cases)
|
|
76
|
-
end
|
|
77
|
-
end
|