mgnu 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +0 -0
  3. data/README.md +31 -0
  4. data/Rakefile +33 -0
  5. data/lib/mgnu.rb +9 -0
  6. data/lib/mgnu/alignment.rb +143 -0
  7. data/lib/mgnu/common.rb +68 -0
  8. data/lib/mgnu/genbank.rb +117 -0
  9. data/lib/mgnu/genbank/feature.rb +84 -0
  10. data/lib/mgnu/genbank/location.rb +150 -0
  11. data/lib/mgnu/genbank/qualifier.rb +45 -0
  12. data/lib/mgnu/genbank/reference.rb +114 -0
  13. data/lib/mgnu/genbank/source.rb +39 -0
  14. data/lib/mgnu/loggable.rb +61 -0
  15. data/lib/mgnu/parser.rb +50 -0
  16. data/lib/mgnu/parser/blast.rb +87 -0
  17. data/lib/mgnu/parser/blast/format0.rb +290 -0
  18. data/lib/mgnu/parser/blast/format7.rb +121 -0
  19. data/lib/mgnu/parser/blast/format8.rb +120 -0
  20. data/lib/mgnu/parser/blast/hsp.rb +75 -0
  21. data/lib/mgnu/parser/blast/query.rb +45 -0
  22. data/lib/mgnu/parser/blast/sbjct.rb +62 -0
  23. data/lib/mgnu/parser/clustalw.rb +72 -0
  24. data/lib/mgnu/parser/fasta.rb +61 -0
  25. data/lib/mgnu/parser/fasta_header_index.rb +39 -0
  26. data/lib/mgnu/parser/fasta_index.rb +57 -0
  27. data/lib/mgnu/parser/fastq.rb +61 -0
  28. data/lib/mgnu/parser/genbank.rb +187 -0
  29. data/lib/mgnu/parser/gff.rb +56 -0
  30. data/lib/mgnu/parser/iprscan/hit.rb +76 -0
  31. data/lib/mgnu/parser/iprscan_file.rb +39 -0
  32. data/lib/mgnu/parser/kegg_ontology_index.rb +163 -0
  33. data/lib/mgnu/parser/pilercr.rb +102 -0
  34. data/lib/mgnu/parser/prodigal.rb +170 -0
  35. data/lib/mgnu/parser/sam.rb +115 -0
  36. data/lib/mgnu/parser/sam/alignment.rb +22 -0
  37. data/lib/mgnu/parser/sam/header.rb +23 -0
  38. data/lib/mgnu/parser/sam/pair.rb +18 -0
  39. data/lib/mgnu/sequence.rb +207 -0
  40. data/lib/mgnu/sequence/fasta.rb +79 -0
  41. data/lib/mgnu/sequence/fastq.rb +43 -0
  42. data/lib/mgnu/version.rb +16 -0
  43. data/mgnu.gemspec +39 -0
  44. data/spec/mgnu/parser/blast_format0_spec.rb +114 -0
  45. data/spec/mgnu/parser/blast_format7_spec.rb +24 -0
  46. data/spec/mgnu/parser/blast_format8_spec.rb +26 -0
  47. data/spec/mgnu/parser/blast_multihsp_spec.rb +100 -0
  48. data/spec/mgnu/parser/blast_oof_spec.rb +53 -0
  49. data/spec/mgnu/parser/clustalw_spec.rb +90 -0
  50. data/spec/mgnu/parser/fasta_header_index_tc_parser_spec.rb +25 -0
  51. data/spec/mgnu/parser/fasta_index_tc_parser_spec.rb +25 -0
  52. data/spec/mgnu/parser/fasta_parser_spec.rb +53 -0
  53. data/spec/mgnu/parser_spec.rb +22 -0
  54. data/spec/mgnu/sequence/fasta_spec.rb +60 -0
  55. data/spec/mgnu/sequence/fastq_spec.rb +31 -0
  56. data/spec/mgnu/sequence_spec.rb +81 -0
  57. data/spec/mgnu_spec.rb +7 -0
  58. data/spec/spec_helper.rb +53 -0
  59. metadata +376 -0
@@ -0,0 +1,114 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast format0' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/test.blast', 0)
6
+ end
7
+
8
+ it 'knows about attributes' do
9
+ expect(@report).to be_a(MgNu::Parser::Blast)
10
+ expect(@report).to respond_to(:parse)
11
+ end
12
+
13
+ describe 'after calling parse' do
14
+ before do
15
+ @queries = @report.parse
16
+ end
17
+
18
+ it 'should have the correct number of queries' do
19
+ expect(@queries.length).to eq(3)
20
+ end
21
+
22
+ it 'should contain the correct first query' do
23
+ expect(@queries[0].query_id).to eq('test')
24
+ end
25
+
26
+ it 'should have the correct number of sbjcts for query 1' do
27
+ expect(@queries[0].sbjcts.length).to eq(10)
28
+ end
29
+
30
+ it 'should have the correct number of sbjcts for query 2' do
31
+ expect(@queries[1].sbjcts.length).to eq(10)
32
+ end
33
+
34
+ describe 'the third query' do
35
+ before do
36
+ @query = @queries[2]
37
+ end
38
+
39
+ it 'should have a correct database name' do
40
+ expect(@query.database).to eq('NCBI Protein Reference Sequences')
41
+ end
42
+
43
+ it 'should respond to sbjcts' do
44
+ expect(@query).to respond_to(:sbjcts)
45
+ expect(@query.sbjcts).to be_a(Array)
46
+ expect(@query.sbjcts.length).to eq(10)
47
+ end
48
+
49
+ describe 'the first sbjct of the third query' do
50
+ before do
51
+ @sbjct = @query.sbjcts[0]
52
+ end
53
+
54
+ it 'should have number 1' do
55
+ expect(@sbjct.number).to eq(1)
56
+ end
57
+
58
+ it 'should have the correct sbjct name' do
59
+ expect(@sbjct.sbjct_id).to eq('ref|ZP_02544218.1|')
60
+ end
61
+
62
+ it 'should have the correct length' do
63
+ expect(@sbjct.length).to eq(121)
64
+ end
65
+
66
+ it 'should have a non-empty array of Hsps' do
67
+ expect(@sbjct.hsps.length).to be > 0
68
+ end
69
+
70
+ it 'should respond to best_hsp and return an Hsp object' do
71
+ expect(@query.sbjcts[0]).to respond_to(:best_hsp)
72
+ expect(@sbjct.best_hsp).to be_a(MgNu::Parser::Blast::Hsp)
73
+ end
74
+
75
+ describe 'the first hsp of the first sbjct of the third query' do
76
+ before do
77
+ @hsp = @sbjct.hsps[0]
78
+ end
79
+
80
+ it 'should have a known query sequence' do
81
+ str = 'MKKIL-ATIXSAALYGLP----AXVMAQGITDDLSNLGLNXFGNETNLGTNIALIGTIARIINILLGFLGVLAVILVLWGGFKWMTAAGDEAKIGEAKKLMGAGVIGLVIILAAFAIASFVVNQL'
82
+ expect(@hsp.query_sequence).to eq(str)
83
+ end
84
+
85
+ it 'should have a known midline' do
86
+ str = 'MKK L A + S + P A V A ++ S + GN T+L + I I+NILL G +AVI+++ GG +++ ++GD ++ AK + VIGL++++ A+AI +FVV +'
87
+ expect(@hsp.midline).to eq(str)
88
+ end
89
+
90
+ it 'should have a known sbjct sequence' do
91
+ str = 'MKKFLIAALVSLGIVVTPLAMDAPVFANAKSEVTSGVSSVNDGNSTDLPS------FITNIVNILLFLAGAVAVIVIIIGGIRYVMSSGDAGQVQSAKNTILYAVIGLIVVIMAYAIVNFVVTNV'
92
+ expect(@hsp.sbjct_sequence).to eq(str)
93
+ end
94
+
95
+ it 'should have the correct bit score and raw score' do
96
+ expect(@hsp.bit_score).to eq(62.0)
97
+ expect(@hsp.score).to eq(149)
98
+ end
99
+
100
+ it 'should have a correct evalue' do
101
+ expect(@hsp.evalue).to eq(1e-10)
102
+ end
103
+
104
+ it 'should have a non-nil query|sbjct_from and _to values' do
105
+ expect(@hsp.query_from).not_to be_nil
106
+ expect(@hsp.query_to).not_to be_nil
107
+ expect(@hsp.sbjct_from).not_to be_nil
108
+ expect(@hsp.sbjct_to).not_to be_nil
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast format7' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/test.blast.xml')
6
+ @queries = @report.parse
7
+ end
8
+
9
+ it 'should correctly parse' do
10
+ expect(@queries.length).to eq(8)
11
+ end
12
+
13
+ it 'should report the correct information for queries' do
14
+ query_10 = @queries.select{|x| x.number == 10}[0]
15
+ expect(query_10).to be_a(MgNu::Parser::Blast::Query)
16
+ expect(query_10.number).to eq(10)
17
+ expect(query_10.sbjcts.length).to eq(7)
18
+
19
+ sbjct_6 = query_10.sbjcts.select{|x| x.number == 6}[0]
20
+ expect(sbjct_6.sbjct_id).to eq('gi|31376410|gb|AC096051.7|')
21
+ expect(sbjct_6.hsps.length).to eq(1)
22
+ expect(sbjct_6.hsps[0].evalue).to eq(2.97608)
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast format8' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('./data/test.blast.m8', 8)
6
+ @queries = @report.parse
7
+ expect(@report).to be_a(MgNu::Parser::Blast)
8
+ end
9
+
10
+ it 'should correctly parse' do
11
+ expect(@queries.length).to eq(2)
12
+ end
13
+
14
+ it 'should report the correct information for queries' do
15
+ expect(@queries[0].sbjcts.length).to eq(3)
16
+ expect(@queries[1].sbjcts.length).to eq(3)
17
+
18
+ expect(@queries[0].sbjcts[0].hsps.length).to eq(3)
19
+ expect(@queries[0].sbjcts[1].hsps.length).to eq(3)
20
+ expect(@queries[0].sbjcts[2].hsps.length).to eq(3)
21
+
22
+ expect(@queries[1].sbjcts[0].hsps.length).to eq(8)
23
+ expect(@queries[1].sbjcts[1].hsps.length).to eq(1)
24
+ expect(@queries[1].sbjcts[2].hsps.length).to eq(1)
25
+ end
26
+ end
@@ -0,0 +1,100 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast parsing a multihsp report' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/multihsp.blast', 0)
6
+ @queries = @report.parse
7
+ @query = @queries[0]
8
+ @sbjct = @query.sbjcts[0]
9
+ end
10
+
11
+ it 'should have one query' do
12
+ expect(@queries.length).to eq(1)
13
+ end
14
+
15
+ it 'should have 1 sbjcts for query 1' do
16
+ expect(@queries[0].sbjcts.length).to eq(1)
17
+ end
18
+
19
+ it 'should have the correct sbjct_id for query 1' do
20
+ expect(@sbjct.sbjct_id).to eq('Contig17100_1_5')
21
+ end
22
+
23
+ it 'should have the correct sbjct length' do
24
+ expect(@sbjct.length).to eq(2190)
25
+ end
26
+
27
+ it 'should have a non-empty array of Hsps' do
28
+ expect(@sbjct.hsps.length).to be > 0
29
+ end
30
+
31
+ describe 'Query 1, Sbjct 1, Hsp 1' do
32
+ before do
33
+ @hsp = @sbjct.hsps[0]
34
+ end
35
+
36
+ it 'should have a known query sequence' do
37
+ str = 'gatcgacggcaagct'
38
+ expect(@hsp.query_sequence).to eq(str)
39
+ end
40
+
41
+ it 'should have a known midline' do
42
+ str = '|||||||||||||||'
43
+ expect(@hsp.midline).to eq(str)
44
+ end
45
+
46
+ it 'should have a known sbjct sequence' do
47
+ str = 'gatcgacggcaagct'
48
+ expect(@hsp.sbjct_sequence).to eq(str)
49
+ end
50
+
51
+ it 'should have the correct bit score and raw score' do
52
+ expect(@hsp.bit_score).to eq(30.2)
53
+ expect(@hsp.score).to eq(15)
54
+ end
55
+
56
+ it 'should have a correct evalue' do
57
+ expect(@hsp.evalue).to eq(0.12)
58
+ end
59
+ end # end - hsp 1 context
60
+
61
+ describe 'Query 1, Sbjct 1, Hsp 4' do
62
+ before do
63
+ @hsp = @sbjct.hsps[3]
64
+ end
65
+
66
+ it 'should have a known query sequence' do
67
+ str = 'gatcgataaagtg'
68
+ expect(@hsp.query_sequence).to eq(str)
69
+ end
70
+
71
+ it 'should have a known midline' do
72
+ str = '|||||||||||||'
73
+ expect(@hsp.midline).to eq(str)
74
+ end
75
+
76
+ it 'should have a known sbjct sequence' do
77
+ str = 'gatcgataaagtg'
78
+ expect(@hsp.sbjct_sequence).to eq(str)
79
+ end
80
+
81
+ it 'should have the correct bit score and raw score' do
82
+ expect(@hsp.bit_score).to eq(26.3)
83
+ expect(@hsp.score).to eq(13)
84
+ end
85
+
86
+ it 'should have a correct evalue' do
87
+ expect(@hsp.evalue).to eq(1.8)
88
+ end
89
+
90
+ it 'should have the correct query start/stop positions' do
91
+ expect(@hsp.query_from).to eq(64175)
92
+ expect(@hsp.query_to).to eq(64187)
93
+ end
94
+
95
+ it 'should have the correct sbjct start/stop positions' do
96
+ expect(@hsp.sbjct_from).to eq(1566)
97
+ expect(@hsp.sbjct_to).to eq(1578)
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'MgNu::Parser::Blast parsing a oof report' do
4
+ before do
5
+ @report = MgNu::Parser::Blast.new('data/test_blastx_oof.blast', 0)
6
+ @queries = @report.parse
7
+ @query = @queries[0]
8
+ end
9
+
10
+ it 'should have 1 query' do
11
+ expect(@queries.length).to eq(1)
12
+ end
13
+
14
+ it 'should have 1 sbjcts for query 1' do
15
+ expect(@query.sbjcts.length).to eq(1)
16
+ end
17
+
18
+ describe 'Query 1, Sbjct 1' do
19
+ before do
20
+ @sbjct = @query.sbjcts[0]
21
+ end
22
+
23
+ it 'should have the correct sbjct_id' do
24
+ expect(@sbjct.sbjct_id).to eq('spe:Spro_0261')
25
+ end
26
+
27
+ it 'should have the correct length' do
28
+ expect(@sbjct.length).to eq(729)
29
+ end
30
+
31
+ it 'should have a non-empty array of Hsps' do
32
+ expect(@sbjct.hsps.length).to be > 0
33
+ end
34
+
35
+ describe 'Query 1, Sbjct 1, Hsp 1' do
36
+ before do
37
+ @hsp = @sbjct.hsps[0]
38
+ end
39
+
40
+ it 'should contain one frame-shift characters in the query sequence' do
41
+ expect(@hsp.query_sequence.split(%r{\/}).length).to be(2)
42
+ end
43
+
44
+ it 'should respond to query_frameshifts and return a hash with the correct key/value pair' do
45
+ expect(@hsp).to respond_to(:query_frameshifts)
46
+ expect(@hsp.query_frameshifts).to be_a(Hash)
47
+ expect(@hsp.query_frameshifts.keys.length).to eq(1)
48
+ expect(@hsp.query_frameshifts.keys[0]).to eq(576)
49
+ expect(@hsp.query_frameshifts.values[0]).to eq(1)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,90 @@
1
+ require 'spec_helper'
2
+
3
+ describe MgNu::Parser::ClustalW do
4
+ before do
5
+ @clw = MgNu::Parser::ClustalW.new('data/clustalw_test.aln')
6
+ end
7
+
8
+ it 'should be a correct object' do
9
+ expect(@clw).to be_a(MgNu::Parser::ClustalW)
10
+ expect(@clw).to respond_to(:file)
11
+ expect(@clw).to respond_to(:buffer)
12
+ expect(@clw).to respond_to(:alignment)
13
+ end
14
+
15
+ it 'should be parsed after creation' do
16
+ expect(@clw.buffer.length).to be > 0
17
+ end
18
+
19
+ it 'return a MgNu::Alignment' do
20
+ expect(@clw.alignment).to be_a(MgNu::Alignment)
21
+ end
22
+
23
+ describe 'MgNu::Alignment' do
24
+ before do
25
+ @a = @clw.alignment
26
+ end
27
+
28
+ it 'should act like an array' do
29
+ count = 0
30
+ @a.each do |s|
31
+ count += 1
32
+ expect(s).to be_a(String)
33
+ end
34
+ expect(count).to eq(7)
35
+ end
36
+
37
+ it 'can iterate by column' do
38
+ count = 0
39
+ @a.each_position do
40
+ count += 1
41
+ end
42
+ expect(count).to eq(@a.length)
43
+ expect(@a.each_position).to be_a(Array)
44
+ end
45
+
46
+ it 'can iterate of a range of columns' do
47
+ count = 0
48
+ @a.each_position(10..20) do
49
+ count += 1
50
+ end
51
+ r = []
52
+ (10..20).each { |x| r << x }
53
+ expect(count).to eq(r.length)
54
+ end
55
+
56
+ it 'can fetch a single position in in the alignment' do
57
+ m = nil
58
+ @a.each_position(3) do |pos|
59
+ m = pos
60
+ end
61
+ expect(m).to be_a(Array)
62
+ expect(m[0]).to eq('K')
63
+ expect(m[1]).to eq('N')
64
+ end
65
+
66
+ it 'should behave like an enumerable' do
67
+ expect(@a[10..20]).to be_a(Array)
68
+ m = @a[3]
69
+ expect(m).to be_a(Array)
70
+ expect(m[0][0]).to eq('K')
71
+ expect(m[0][1]).to eq('N')
72
+ end
73
+
74
+ it 'reports the correct match length' do
75
+ expect(@a.match.length).to eq(@a.length)
76
+ end
77
+
78
+ it 'reports correct match data using a range operator' do
79
+ expect(@a.match(107..112)).to eq('. : .*')
80
+ end
81
+
82
+ it 'reports single position match values correctly' do
83
+ expect(@a.match(3)).to eq(' ')
84
+ expect(@a.match(107)).to eq('.')
85
+ expect(@a.match(108)).to eq(' ')
86
+ expect(@a.match(109)).to eq(':')
87
+ expect(@a.match(112)).to eq('*')
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe MgNu::Parser::FastaHeaderIndex do
4
+ before do
5
+ @ff = MgNu::Parser::FastaHeaderIndex.new('data/test.fasta')
6
+ end
7
+
8
+ it 'should know about attributes' do
9
+ expect(@ff).to be_a(MgNu::Parser::FastaHeaderIndex)
10
+ expect(File.exist?('data/test.fasta.hdr.tch')).to be(true)
11
+ expect(@ff).to respond_to(:filename)
12
+ end
13
+
14
+ it 'should allow hash-like access using Fasta header names' do
15
+ expect(@ff['name1']).to eq('description1')
16
+ end
17
+
18
+ after(:each) do
19
+ @ff.close
20
+ end
21
+
22
+ after do
23
+ File.delete('data/test.fasta.hdr.tch') if File.exist?('data/test.fasta.hdr.tch')
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe MgNu::Parser::FastaIndex do
4
+ before do
5
+ @ff = MgNu::Parser::FastaIndex.new('data/test.fasta')
6
+ end
7
+
8
+ it 'should know about attributes' do
9
+ expect(@ff).to be_a(MgNu::Parser::FastaIndex)
10
+ expect(File.exist?('data/test.fasta.tch')).to be(true)
11
+ expect(@ff).to respond_to(:filename)
12
+ end
13
+
14
+ it 'should allow hash-like access using Fasta header names' do
15
+ expect(@ff['name1'].sequence).to eq('ACCG')
16
+ end
17
+
18
+ after(:each) do
19
+ @ff.close
20
+ end
21
+
22
+ after do
23
+ File.delete('data/test.fasta.tch') if File.exist?('data/test.fasta.tch')
24
+ end
25
+ end