mgnu 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +0 -0
  3. data/README.md +31 -0
  4. data/Rakefile +33 -0
  5. data/lib/mgnu.rb +9 -0
  6. data/lib/mgnu/alignment.rb +143 -0
  7. data/lib/mgnu/common.rb +68 -0
  8. data/lib/mgnu/genbank.rb +117 -0
  9. data/lib/mgnu/genbank/feature.rb +84 -0
  10. data/lib/mgnu/genbank/location.rb +150 -0
  11. data/lib/mgnu/genbank/qualifier.rb +45 -0
  12. data/lib/mgnu/genbank/reference.rb +114 -0
  13. data/lib/mgnu/genbank/source.rb +39 -0
  14. data/lib/mgnu/loggable.rb +61 -0
  15. data/lib/mgnu/parser.rb +50 -0
  16. data/lib/mgnu/parser/blast.rb +87 -0
  17. data/lib/mgnu/parser/blast/format0.rb +290 -0
  18. data/lib/mgnu/parser/blast/format7.rb +121 -0
  19. data/lib/mgnu/parser/blast/format8.rb +120 -0
  20. data/lib/mgnu/parser/blast/hsp.rb +75 -0
  21. data/lib/mgnu/parser/blast/query.rb +45 -0
  22. data/lib/mgnu/parser/blast/sbjct.rb +62 -0
  23. data/lib/mgnu/parser/clustalw.rb +72 -0
  24. data/lib/mgnu/parser/fasta.rb +61 -0
  25. data/lib/mgnu/parser/fasta_header_index.rb +39 -0
  26. data/lib/mgnu/parser/fasta_index.rb +57 -0
  27. data/lib/mgnu/parser/fastq.rb +61 -0
  28. data/lib/mgnu/parser/genbank.rb +187 -0
  29. data/lib/mgnu/parser/gff.rb +56 -0
  30. data/lib/mgnu/parser/iprscan/hit.rb +76 -0
  31. data/lib/mgnu/parser/iprscan_file.rb +39 -0
  32. data/lib/mgnu/parser/kegg_ontology_index.rb +163 -0
  33. data/lib/mgnu/parser/pilercr.rb +102 -0
  34. data/lib/mgnu/parser/prodigal.rb +170 -0
  35. data/lib/mgnu/parser/sam.rb +115 -0
  36. data/lib/mgnu/parser/sam/alignment.rb +22 -0
  37. data/lib/mgnu/parser/sam/header.rb +23 -0
  38. data/lib/mgnu/parser/sam/pair.rb +18 -0
  39. data/lib/mgnu/sequence.rb +207 -0
  40. data/lib/mgnu/sequence/fasta.rb +79 -0
  41. data/lib/mgnu/sequence/fastq.rb +43 -0
  42. data/lib/mgnu/version.rb +16 -0
  43. data/mgnu.gemspec +39 -0
  44. data/spec/mgnu/parser/blast_format0_spec.rb +114 -0
  45. data/spec/mgnu/parser/blast_format7_spec.rb +24 -0
  46. data/spec/mgnu/parser/blast_format8_spec.rb +26 -0
  47. data/spec/mgnu/parser/blast_multihsp_spec.rb +100 -0
  48. data/spec/mgnu/parser/blast_oof_spec.rb +53 -0
  49. data/spec/mgnu/parser/clustalw_spec.rb +90 -0
  50. data/spec/mgnu/parser/fasta_header_index_tc_parser_spec.rb +25 -0
  51. data/spec/mgnu/parser/fasta_index_tc_parser_spec.rb +25 -0
  52. data/spec/mgnu/parser/fasta_parser_spec.rb +53 -0
  53. data/spec/mgnu/parser_spec.rb +22 -0
  54. data/spec/mgnu/sequence/fasta_spec.rb +60 -0
  55. data/spec/mgnu/sequence/fastq_spec.rb +31 -0
  56. data/spec/mgnu/sequence_spec.rb +81 -0
  57. data/spec/mgnu_spec.rb +7 -0
  58. data/spec/spec_helper.rb +53 -0
  59. metadata +376 -0
@@ -0,0 +1,114 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast format0' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/test.blast', 0)
6
+ end
7
+
8
+ it 'knows about attributes' do
9
+ expect(@report).to be_a(MgNu::Parser::Blast)
10
+ expect(@report).to respond_to(:parse)
11
+ end
12
+
13
+ describe 'after calling parse' do
14
+ before do
15
+ @queries = @report.parse
16
+ end
17
+
18
+ it 'should have the correct number of queries' do
19
+ expect(@queries.length).to eq(3)
20
+ end
21
+
22
+ it 'should contain the correct first query' do
23
+ expect(@queries[0].query_id).to eq('test')
24
+ end
25
+
26
+ it 'should have the correct number of sbjcts for query 1' do
27
+ expect(@queries[0].sbjcts.length).to eq(10)
28
+ end
29
+
30
+ it 'should have the correct number of sbjcts for query 2' do
31
+ expect(@queries[1].sbjcts.length).to eq(10)
32
+ end
33
+
34
+ describe 'the third query' do
35
+ before do
36
+ @query = @queries[2]
37
+ end
38
+
39
+ it 'should have a correct database name' do
40
+ expect(@query.database).to eq('NCBI Protein Reference Sequences')
41
+ end
42
+
43
+ it 'should respond to sbjcts' do
44
+ expect(@query).to respond_to(:sbjcts)
45
+ expect(@query.sbjcts).to be_a(Array)
46
+ expect(@query.sbjcts.length).to eq(10)
47
+ end
48
+
49
+ describe 'the first sbjct of the third query' do
50
+ before do
51
+ @sbjct = @query.sbjcts[0]
52
+ end
53
+
54
+ it 'should have number 1' do
55
+ expect(@sbjct.number).to eq(1)
56
+ end
57
+
58
+ it 'should have the correct sbjct name' do
59
+ expect(@sbjct.sbjct_id).to eq('ref|ZP_02544218.1|')
60
+ end
61
+
62
+ it 'should have the correct length' do
63
+ expect(@sbjct.length).to eq(121)
64
+ end
65
+
66
+ it 'should have a non-empty array of Hsps' do
67
+ expect(@sbjct.hsps.length).to be > 0
68
+ end
69
+
70
+ it 'should respond to best_hsp and return an Hsp object' do
71
+ expect(@query.sbjcts[0]).to respond_to(:best_hsp)
72
+ expect(@sbjct.best_hsp).to be_a(MgNu::Parser::Blast::Hsp)
73
+ end
74
+
75
+ describe 'the first hsp of the first sbjct of the third query' do
76
+ before do
77
+ @hsp = @sbjct.hsps[0]
78
+ end
79
+
80
+ it 'should have a known query sequence' do
81
+ str = 'MKKIL-ATIXSAALYGLP----AXVMAQGITDDLSNLGLNXFGNETNLGTNIALIGTIARIINILLGFLGVLAVILVLWGGFKWMTAAGDEAKIGEAKKLMGAGVIGLVIILAAFAIASFVVNQL'
82
+ expect(@hsp.query_sequence).to eq(str)
83
+ end
84
+
85
+ it 'should have a known midline' do
86
+ str = 'MKK L A + S + P A V A ++ S + GN T+L + I I+NILL G +AVI+++ GG +++ ++GD ++ AK + VIGL++++ A+AI +FVV +'
87
+ expect(@hsp.midline).to eq(str)
88
+ end
89
+
90
+ it 'should have a known sbjct sequence' do
91
+ str = 'MKKFLIAALVSLGIVVTPLAMDAPVFANAKSEVTSGVSSVNDGNSTDLPS------FITNIVNILLFLAGAVAVIVIIIGGIRYVMSSGDAGQVQSAKNTILYAVIGLIVVIMAYAIVNFVVTNV'
92
+ expect(@hsp.sbjct_sequence).to eq(str)
93
+ end
94
+
95
+ it 'should have the correct bit score and raw score' do
96
+ expect(@hsp.bit_score).to eq(62.0)
97
+ expect(@hsp.score).to eq(149)
98
+ end
99
+
100
+ it 'should have a correct evalue' do
101
+ expect(@hsp.evalue).to eq(1e-10)
102
+ end
103
+
104
+ it 'should have a non-nil query|sbjct_from and _to values' do
105
+ expect(@hsp.query_from).not_to be_nil
106
+ expect(@hsp.query_to).not_to be_nil
107
+ expect(@hsp.sbjct_from).not_to be_nil
108
+ expect(@hsp.sbjct_to).not_to be_nil
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast format7' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/test.blast.xml')
6
+ @queries = @report.parse
7
+ end
8
+
9
+ it 'should correctly parse' do
10
+ expect(@queries.length).to eq(8)
11
+ end
12
+
13
+ it 'should report the correct information for queries' do
14
+ query_10 = @queries.select{|x| x.number == 10}[0]
15
+ expect(query_10).to be_a(MgNu::Parser::Blast::Query)
16
+ expect(query_10.number).to eq(10)
17
+ expect(query_10.sbjcts.length).to eq(7)
18
+
19
+ sbjct_6 = query_10.sbjcts.select{|x| x.number == 6}[0]
20
+ expect(sbjct_6.sbjct_id).to eq('gi|31376410|gb|AC096051.7|')
21
+ expect(sbjct_6.hsps.length).to eq(1)
22
+ expect(sbjct_6.hsps[0].evalue).to eq(2.97608)
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast format8' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('./data/test.blast.m8', 8)
6
+ @queries = @report.parse
7
+ expect(@report).to be_a(MgNu::Parser::Blast)
8
+ end
9
+
10
+ it 'should correctly parse' do
11
+ expect(@queries.length).to eq(2)
12
+ end
13
+
14
+ it 'should report the correct information for queries' do
15
+ expect(@queries[0].sbjcts.length).to eq(3)
16
+ expect(@queries[1].sbjcts.length).to eq(3)
17
+
18
+ expect(@queries[0].sbjcts[0].hsps.length).to eq(3)
19
+ expect(@queries[0].sbjcts[1].hsps.length).to eq(3)
20
+ expect(@queries[0].sbjcts[2].hsps.length).to eq(3)
21
+
22
+ expect(@queries[1].sbjcts[0].hsps.length).to eq(8)
23
+ expect(@queries[1].sbjcts[1].hsps.length).to eq(1)
24
+ expect(@queries[1].sbjcts[2].hsps.length).to eq(1)
25
+ end
26
+ end
@@ -0,0 +1,100 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast parsing a multihsp report' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/multihsp.blast', 0)
6
+ @queries = @report.parse
7
+ @query = @queries[0]
8
+ @sbjct = @query.sbjcts[0]
9
+ end
10
+
11
+ it 'should have one query' do
12
+ expect(@queries.length).to eq(1)
13
+ end
14
+
15
+ it 'should have 1 sbjcts for query 1' do
16
+ expect(@queries[0].sbjcts.length).to eq(1)
17
+ end
18
+
19
+ it 'should have the correct sbjct_id for query 1' do
20
+ expect(@sbjct.sbjct_id).to eq('Contig17100_1_5')
21
+ end
22
+
23
+ it 'should have the correct sbjct length' do
24
+ expect(@sbjct.length).to eq(2190)
25
+ end
26
+
27
+ it 'should have a non-empty array of Hsps' do
28
+ expect(@sbjct.hsps.length).to be > 0
29
+ end
30
+
31
+ describe 'Query 1, Sbjct 1, Hsp 1' do
32
+ before do
33
+ @hsp = @sbjct.hsps[0]
34
+ end
35
+
36
+ it 'should have a known query sequence' do
37
+ str = 'gatcgacggcaagct'
38
+ expect(@hsp.query_sequence).to eq(str)
39
+ end
40
+
41
+ it 'should have a known midline' do
42
+ str = '|||||||||||||||'
43
+ expect(@hsp.midline).to eq(str)
44
+ end
45
+
46
+ it 'should have a known sbjct sequence' do
47
+ str = 'gatcgacggcaagct'
48
+ expect(@hsp.sbjct_sequence).to eq(str)
49
+ end
50
+
51
+ it 'should have the correct bit score and raw score' do
52
+ expect(@hsp.bit_score).to eq(30.2)
53
+ expect(@hsp.score).to eq(15)
54
+ end
55
+
56
+ it 'should have a correct evalue' do
57
+ expect(@hsp.evalue).to eq(0.12)
58
+ end
59
+ end # end - hsp 1 context
60
+
61
+ describe 'Query 1, Sbjct 1, Hsp 4' do
62
+ before do
63
+ @hsp = @sbjct.hsps[3]
64
+ end
65
+
66
+ it 'should have a known query sequence' do
67
+ str = 'gatcgataaagtg'
68
+ expect(@hsp.query_sequence).to eq(str)
69
+ end
70
+
71
+ it 'should have a known midline' do
72
+ str = '|||||||||||||'
73
+ expect(@hsp.midline).to eq(str)
74
+ end
75
+
76
+ it 'should have a known sbjct sequence' do
77
+ str = 'gatcgataaagtg'
78
+ expect(@hsp.sbjct_sequence).to eq(str)
79
+ end
80
+
81
+ it 'should have the correct bit score and raw score' do
82
+ expect(@hsp.bit_score).to eq(26.3)
83
+ expect(@hsp.score).to eq(13)
84
+ end
85
+
86
+ it 'should have a correct evalue' do
87
+ expect(@hsp.evalue).to eq(1.8)
88
+ end
89
+
90
+ it 'should have the correct query start/stop positions' do
91
+ expect(@hsp.query_from).to eq(64175)
92
+ expect(@hsp.query_to).to eq(64187)
93
+ end
94
+
95
+ it 'should have the correct sbjct start/stop positions' do
96
+ expect(@hsp.sbjct_from).to eq(1566)
97
+ expect(@hsp.sbjct_to).to eq(1578)
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast parsing a oof report' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/test_blastx_oof.blast', 0)
6
+ @queries = @report.parse
7
+ @query = @queries[0]
8
+ end
9
+
10
+ it 'should have 1 query' do
11
+ expect(@queries.length).to eq(1)
12
+ end
13
+
14
+ it 'should have 1 sbjcts for query 1' do
15
+ expect(@query.sbjcts.length).to eq(1)
16
+ end
17
+
18
+ describe 'Query 1, Sbjct 1' do
19
+ before do
20
+ @sbjct = @query.sbjcts[0]
21
+ end
22
+
23
+ it 'should have the correct sbjct_id' do
24
+ expect(@sbjct.sbjct_id).to eq('spe:Spro_0261')
25
+ end
26
+
27
+ it 'should have the correct length' do
28
+ expect(@sbjct.length).to eq(729)
29
+ end
30
+
31
+ it 'should have a non-empty array of Hsps' do
32
+ expect(@sbjct.hsps.length).to be > 0
33
+ end
34
+
35
+ describe 'Query 1, Sbjct 1, Hsp 1' do
36
+ before do
37
+ @hsp = @sbjct.hsps[0]
38
+ end
39
+
40
+ it 'should contain one frame-shift characters in the query sequence' do
41
+ expect(@hsp.query_sequence.split(%r{\/}).length).to be(2)
42
+ end
43
+
44
+ it 'should respond to query_frameshifts and return a hash with the correct key/value pair' do
45
+ expect(@hsp).to respond_to(:query_frameshifts)
46
+ expect(@hsp.query_frameshifts).to be_a(Hash)
47
+ expect(@hsp.query_frameshifts.keys.length).to eq(1)
48
+ expect(@hsp.query_frameshifts.keys[0]).to eq(576)
49
+ expect(@hsp.query_frameshifts.values[0]).to eq(1)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,90 @@
1
+ require 'spec_helper'
2
+
3
+ describe MgNu::Parser::ClustalW do
4
+ before do
5
+ @clw = MgNu::Parser::ClustalW.new('data/clustalw_test.aln')
6
+ end
7
+
8
+ it 'should be a correct object' do
9
+ expect(@clw).to be_a(MgNu::Parser::ClustalW)
10
+ expect(@clw).to respond_to(:file)
11
+ expect(@clw).to respond_to(:buffer)
12
+ expect(@clw).to respond_to(:alignment)
13
+ end
14
+
15
+ it 'should be parsed after creation' do
16
+ expect(@clw.buffer.length).to be > 0
17
+ end
18
+
19
+ it 'return a MgNu::Alignment' do
20
+ expect(@clw.alignment).to be_a(MgNu::Alignment)
21
+ end
22
+
23
+ describe 'MgNu::Alignment' do
24
+ before do
25
+ @a = @clw.alignment
26
+ end
27
+
28
+ it 'should act like an array' do
29
+ count = 0
30
+ @a.each do |s|
31
+ count += 1
32
+ expect(s).to be_a(String)
33
+ end
34
+ expect(count).to eq(7)
35
+ end
36
+
37
+ it 'can iterate by column' do
38
+ count = 0
39
+ @a.each_position do
40
+ count += 1
41
+ end
42
+ expect(count).to eq(@a.length)
43
+ expect(@a.each_position).to be_a(Array)
44
+ end
45
+
46
+ it 'can iterate of a range of columns' do
47
+ count = 0
48
+ @a.each_position(10..20) do
49
+ count += 1
50
+ end
51
+ r = []
52
+ (10..20).each { |x| r << x }
53
+ expect(count).to eq(r.length)
54
+ end
55
+
56
+ it 'can fetch a single position in in the alignment' do
57
+ m = nil
58
+ @a.each_position(3) do |pos|
59
+ m = pos
60
+ end
61
+ expect(m).to be_a(Array)
62
+ expect(m[0]).to eq('K')
63
+ expect(m[1]).to eq('N')
64
+ end
65
+
66
+ it 'should behave like an enumerable' do
67
+ expect(@a[10..20]).to be_a(Array)
68
+ m = @a[3]
69
+ expect(m).to be_a(Array)
70
+ expect(m[0][0]).to eq('K')
71
+ expect(m[0][1]).to eq('N')
72
+ end
73
+
74
+ it 'reports the correct match length' do
75
+ expect(@a.match.length).to eq(@a.length)
76
+ end
77
+
78
+ it 'reports correct match data using a range operator' do
79
+ expect(@a.match(107..112)).to eq('. : .*')
80
+ end
81
+
82
+ it 'reports single position match values correctly' do
83
+ expect(@a.match(3)).to eq(' ')
84
+ expect(@a.match(107)).to eq('.')
85
+ expect(@a.match(108)).to eq(' ')
86
+ expect(@a.match(109)).to eq(':')
87
+ expect(@a.match(112)).to eq('*')
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe MgNu::Parser::FastaHeaderIndex do
4
+ before do
5
+ @ff = MgNu::Parser::FastaHeaderIndex.new('data/test.fasta')
6
+ end
7
+
8
+ it 'should know about attributes' do
9
+ expect(@ff).to be_a(MgNu::Parser::FastaHeaderIndex)
10
+ expect(File.exist?('data/test.fasta.hdr.tch')).to be(true)
11
+ expect(@ff).to respond_to(:filename)
12
+ end
13
+
14
+ it 'should allow hash-like access using Fasta header names' do
15
+ expect(@ff['name1']).to eq('description1')
16
+ end
17
+
18
+ after(:each) do
19
+ @ff.close
20
+ end
21
+
22
+ after do
23
+ File.delete('data/test.fasta.hdr.tch') if File.exist?('data/test.fasta.hdr.tch')
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe MgNu::Parser::FastaIndex do
4
+ before do
5
+ @ff = MgNu::Parser::FastaIndex.new('data/test.fasta')
6
+ end
7
+
8
+ it 'should know about attributes' do
9
+ expect(@ff).to be_a(MgNu::Parser::FastaIndex)
10
+ expect(File.exist?('data/test.fasta.tch')).to be(true)
11
+ expect(@ff).to respond_to(:filename)
12
+ end
13
+
14
+ it 'should allow hash-like access using Fasta header names' do
15
+ expect(@ff['name1'].sequence).to eq('ACCG')
16
+ end
17
+
18
+ after(:each) do
19
+ @ff.close
20
+ end
21
+
22
+ after do
23
+ File.delete('data/test.fasta.tch') if File.exist?('data/test.fasta.tch')
24
+ end
25
+ end