ngs-ci 0.0.1.a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+
3
+ testfasta="spec/test_files/test.fa"
4
+ testbam="spec/test_files/test.bam"
5
+
6
+ describe "bin executable" do
7
+
8
+ it "runs the --help option" do
9
+ c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --help")
10
+ c.run
11
+ expect(c.stdout).to match /DESCRIPTION/
12
+ expect(c.status).to be_success
13
+ end
14
+ context "non-existent input files" do
15
+ it "fails on a non-existent bam file" do
16
+ c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam foo.bam --reference #{testfasta}")
17
+ c.run
18
+ expect(c.status).to_not be_success
19
+ end
20
+
21
+ it "fails on a non-existent fasta file" do
22
+ c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference foo.fasta")
23
+ c.run
24
+ expect(c.status).to_not be_success
25
+ end
26
+ end
27
+ context "improper options" do
28
+ it "fails on improper strand-specific argument" do
29
+ c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --strand G")
30
+ c.run
31
+ expect(c.status).to_not be_success
32
+ end
33
+ it "fails on improper loglevel argument" do
34
+ c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --loglevel foo")
35
+ c.run
36
+ expect(c.status).to_not be_success
37
+ end
38
+ end
39
+ it "runs on test data" do
40
+ c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --strand F")
41
+ c.run
42
+ expect(c.status).to be_success
43
+ end
44
+
45
+ it "produces speed information" do
46
+ c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --loglevel debug")
47
+ c.run
48
+ expect(c.stdout).to include("Measure Mode:")
49
+
50
+ end
51
+ end
@@ -0,0 +1,316 @@
1
+ require 'spec_helper'
2
+ require 'bio-samtools'
3
+
4
+ testbam="spec/test_files/test.bam"
5
+ emptybam="spec/test_files/empty.bam"
6
+ testfasta="spec/test_files/test.fa"
7
+ testout="spec/test_files/testfile.txt"
8
+
9
+
10
+
11
+ describe "#run" do
12
+ context "during a strand specific run" do
13
+ before(:each) do
14
+ @calc=NGSCI::Calculator.new(testbam,testfasta,strand:"FR")
15
+ @testchrom=@calc.chroms.keys[0]
16
+ end
17
+ it "returns a hash" do
18
+ expect(@calc.run(runtime: false)).to be_instance_of Hash
19
+ end
20
+ it "returns the hash with keys of the chromosomes names" do
21
+ expect(@calc.run.keys).to eq(@calc.chroms.keys)
22
+ end
23
+ it "returns the hash with keys for each strand" do
24
+ expect(@calc.run[@testchrom].keys).to eq(%w(+ -))
25
+ end
26
+ it "returns NGSCI for each base of the genome" do
27
+ expect(@calc.run[@testchrom]["+"].size).to eq(@calc.chroms[@testchrom])
28
+ end
29
+ end
30
+
31
+ context "during an unstranded run" do
32
+ before(:each) do
33
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
34
+ @testchrom=@calc.chroms.keys[0]
35
+ end
36
+ it "returns the hash with keys of the chromosomes names" do
37
+ expect(@calc.run.keys).to eq(@calc.chroms.keys)
38
+ end
39
+ it "returns the hash with a nil strand key" do
40
+ expect(@calc.run[@testchrom].keys[0]).to be nil
41
+ end
42
+ it "returns NGSCI for each base of the genome" do
43
+ expect(@calc.run[@testchrom][nil].size).to eq(@calc.chroms[@testchrom])
44
+ end
45
+ end
46
+ end
47
+
48
+ describe "#readblock" do
49
+ context "when reading the first block" do
50
+ it "returns a hash with an array of length @block_size" do
51
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
52
+ results=@calc.readblock(@calc.chroms.keys[0],0)
53
+ result_length=results[results.keys[0]].size
54
+ expect(result_length).to eq(@calc.block_size)
55
+ end
56
+ end
57
+ context "when reading any other block" do
58
+ it "returns a hash with and array of length @block_size" do
59
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
60
+ results=@calc.readblock(@calc.chroms.keys[0],1)
61
+ result_length=results[results.keys[0]].size
62
+ expect(result_length).to eq(@calc.block_size)
63
+ end
64
+ end
65
+ end
66
+
67
+ describe "#sci" do
68
+ context "when passed an array of read objects" do
69
+ before(:each) do
70
+ @calc = NGSCI::Calculator.new(testbam,testfasta)
71
+ @bam = Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
72
+ @bam.open
73
+ @reads = []
74
+ @bam.fetch("NC_001988.2",75,75){|x| read = @calc.convert(x); @reads << read unless read.nil?}
75
+ @reads = @reads.uniq{|r|r.start}
76
+ end
77
+ it "returns an array" do
78
+ expect(@calc.sci(@reads)).to be_kind_of(Array)
79
+ end
80
+ it "returns the sequencing complexity index" do
81
+ expect(@calc.sci(@reads)[-1]).to eq(0.0)
82
+ end
83
+ end
84
+ context "when passed an empty array" do
85
+ it "returns nil" do
86
+ @calc = NGSCI::Calculator.new(testbam,testfasta)
87
+ empty_sci = @calc.sci([])[-1]
88
+ expect(empty_sci).to be_zero
89
+ end
90
+ end
91
+ end
92
+
93
+ describe "#read_length" do
94
+ it "calculates the read length" do
95
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
96
+ expect(@calc.buffer).to eq(76)
97
+ end
98
+ it "fails on an empty bam file" do
99
+ expect{NGSCI::Calculator.new(emptybam,testfasta)}.to raise_error(NGSCI::NGSCIIOError)
100
+ `rm #{emptybam}.bai`
101
+ end
102
+ end
103
+
104
+ describe "#summed_overlaps" do
105
+ it "returns an int" do
106
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
107
+ @bam.open
108
+ @reads = []
109
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
110
+ @bam.fetch("NC_001988.2",8,75) {|x| read=@calc.convert(x); @reads << read if read}
111
+ @reads = @reads.uniq{|r| r.start}
112
+ expect(@calc.summed_overlaps(@reads)).to be_an(Integer)
113
+ end
114
+ context "when passed an array of read objects" do
115
+ before(:each) do
116
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
117
+ @bam.open
118
+ @reads = []
119
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
120
+ @bam.fetch("NC_001988.2",8,75) {|x| read=@calc.convert(x); @reads << read if read}
121
+ @reads = @reads.uniq{|r| r.start}
122
+ end
123
+ it "returns the #overlap of two reads" do
124
+ summed_overlap = 2*@calc.overlap(@reads[0],@reads[1])
125
+ expect(@calc.summed_overlaps(@reads[0..1])).to eq(summed_overlap)
126
+ end
127
+
128
+ it "calculates the average overlap between a group of reads" do
129
+ expect(@calc.summed_overlaps(@reads[0..7]).round(4)).to eq(380.0)
130
+ end
131
+ end
132
+ context "when passed an array with a single read object" do
133
+ it "returns zero" do
134
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
135
+ @bam.open
136
+ @reads=[]
137
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
138
+ @bam.fetch("NC_001988.2",8,75) {|x| read=@calc.convert(x); @reads << read if read}
139
+ expect(@calc.summed_overlaps([@reads[0]])).to be_zero
140
+ end
141
+ end
142
+ context "when passed an empty array" do
143
+ it "returns zero" do
144
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
145
+ expect(@calc.summed_overlaps([])).to be_zero
146
+ end
147
+ end
148
+ end
149
+
150
+ describe "#overlap" do
151
+ before(:each) do
152
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
153
+ @bam.open
154
+ @reads=[]
155
+ @bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
156
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
157
+ @read1=@calc.convert(@reads[2])
158
+ @read2=@calc.convert(@reads[3])
159
+ end
160
+ it "calculates the overlap between two reads" do
161
+ expect(@calc.overlap(@read1,@read2)).to eq(14)
162
+ end
163
+
164
+ it "calculates the overlap regardless of order" do
165
+ expect(@calc.overlap(@read2,@read1)).to eq(14)
166
+ end
167
+ end
168
+
169
+ describe '#reference_sequences' do
170
+ before(:each) do
171
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
172
+ end
173
+
174
+ it "retrieves reference sequences" do
175
+ expect(@calc.reference_sequences(testfasta).keys).to include "NC_001988.2"
176
+ end
177
+ end
178
+
179
+ describe '#newread' do
180
+ before(:each) do
181
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
182
+ @bam.open
183
+ @reads=[]
184
+ @bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
185
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
186
+ end
187
+
188
+ it "converts an alignment object to a read object" do
189
+ expect(@calc.newread(@reads[0])).to be_instance_of NGSCI::Read
190
+ end
191
+ end
192
+
193
+ describe "#fr" do
194
+ before(:each) do
195
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
196
+ @bam.open
197
+ @reads=[]
198
+ @bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
199
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
200
+ @testpair=[@reads[5],@reads[10]]
201
+ end
202
+
203
+ it "converts the first read with FR chemistry" do
204
+ first=@calc.fr(@testpair[0])
205
+ expect(first.strand).to eq("+")
206
+ end
207
+ it "converts the second read with FR chemistry" do
208
+ second=@calc.fr(@testpair[1])
209
+ expect(second.strand).to eq("+")
210
+ end
211
+ end
212
+
213
+ describe "#rf" do
214
+ before(:each) do
215
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
216
+ @bam.open
217
+ @reads=[]
218
+ @bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
219
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
220
+ @testpair=[@reads[5],@reads[10]]
221
+ end
222
+ it "converts the first read with RF chemistry" do
223
+ first=@calc.rf(@testpair[0])
224
+ expect(first.strand).to eq("-")
225
+ end
226
+
227
+ it "converts the second read with RF chemistry" do
228
+ second=@calc.rf(@testpair[1])
229
+ expect(second.strand).to eq("-")
230
+ end
231
+ end
232
+
233
+ describe "#f" do
234
+ before(:each) do
235
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
236
+ @bam.open
237
+ @reads=[]
238
+ @bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
239
+ @calc=NGSCI::Calculator.new(testbam,testfasta)
240
+ @testpair=[@reads[5],@reads[10]]
241
+ end
242
+ it "converts a read with F chemistry on the + strand" do
243
+ first=@calc.f(@testpair[0])
244
+ expect(first.strand).to eq("-")
245
+ end
246
+ it "converts a read with F chemistry on the - strand" do
247
+ second=@calc.f(@testpair[1])
248
+ expect(second.strand).to eq("+")
249
+ end
250
+
251
+ end
252
+
253
+ describe '#convert' do
254
+ before(:each) do
255
+ @bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
256
+ @bam.open
257
+ @reads=[]
258
+ @bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
259
+ end
260
+ it "converts an alignment object to a read object" do
261
+ calc=NGSCI::Calculator.new(testbam,testfasta)
262
+ expect(calc.convert(@reads[2])).to be_instance_of NGSCI::Read
263
+ end
264
+
265
+ it "returns nil for an unmapped read" do
266
+ calc=NGSCI::Calculator.new(testbam,testfasta)
267
+ expect(calc.convert(@reads[1])).to be_nil
268
+ end
269
+
270
+ it "converts the first read in FR chemistry aligned to the + strand" do
271
+ calc=NGSCI::Calculator.new(testbam,testfasta,strand:"FR")
272
+ testpair=[@reads[5],@reads[10]]
273
+ first=calc.convert(testpair[1])
274
+ expect(first.strand).to eq("+")
275
+ end
276
+ it "converts the second read in FR chemistry aligned to the - strand" do
277
+ calc=NGSCI::Calculator.new(testbam,testfasta,strand:"FR")
278
+ testpair=[@reads[5],@reads[10]]
279
+ second=calc.convert(testpair[0])
280
+ expect(second.strand).to eq("+")
281
+ end
282
+ it "converts the first read in RF chemistry aligned to the + strand" do
283
+ calc=NGSCI::Calculator.new(testbam,testfasta,strand:"RF")
284
+ testpair=[@reads[5],@reads[10]]
285
+ first=calc.convert(testpair[1])
286
+ expect(first.strand).to eq("-")
287
+ end
288
+ it "converts the second read in FR chemistry aligned to the - strand" do
289
+ calc=NGSCI::Calculator.new(testbam,testfasta,strand:"RF")
290
+ testpair=[@reads[5],@reads[10]]
291
+ second=calc.convert(testpair[0])
292
+ expect(second.strand).to eq("-")
293
+ end
294
+
295
+ end
296
+
297
+ describe "#export" do
298
+ context "calculator has not run" do
299
+ it "returns nil" do
300
+ calc=NGSCI::Calculator.new(testbam,testfasta)
301
+ expect(calc.export(testout)).to be nil
302
+ end
303
+ end
304
+ context "calculator has run" do
305
+ after(:all) do
306
+ `rm #{testout}`
307
+ end
308
+ it "returns outfile" do
309
+ calc=NGSCI::Calculator.new(testbam,testfasta)
310
+ calc.run
311
+ expect(calc.export(testout)).to eq(testout)
312
+ end
313
+ end
314
+ end
315
+
316
+
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+
4
+ describe "command" do
5
+
6
+ it "runs commands" do
7
+ c=NGSCI::Cmd.new("echo success")
8
+ c.run
9
+ expect(c.stdout.chomp).to eq("success")
10
+ end
11
+
12
+ it "receives commands" do
13
+ c=NGSCI::Cmd.new("echo success")
14
+ expect(c.to_s).to eq("echo success")
15
+ end
16
+
17
+ end
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+
4
+ describe "reads" do
5
+
6
+ it "fails to instantiate on a string start site" do
7
+ expect{NGSCI::Read.new("foo",3)}.to raise_error(NGSCI::NGSCIError)
8
+ end
9
+
10
+ it "fails to instantiate on a string stop site" do
11
+ expect{NGSCI::Read.new(1,"foo")}.to raise_error(NGSCI::NGSCIError)
12
+ end
13
+
14
+ it "fails to instantiate when the stop site is greater than the start site" do
15
+ expect{NGSCI::Read.new(3,1)}.to raise_error(NGSCI::NGSCIError)
16
+ end
17
+
18
+ it "fails to instantiate on an improper strand argument" do
19
+ expect{NGSCI::Read.new(1,3,strand:"foo")}.to raise_error(NGSCI::NGSCIError)
20
+ end
21
+
22
+ it "fails to instantiate without the three necessary arguments" do
23
+ expect{NGSCI::Read.new(1)}.to raise_error(ArgumentError)
24
+ end
25
+
26
+ it "instantiates a new read with proper unstranded arguments" do
27
+ expect{NGSCI::Read.new(1,3)}.to_not raise_error
28
+ end
29
+
30
+ it "instantiates a new read with proper stranded arguments" do
31
+ expect{NGSCI::Read.new(1,3,strand:"+")}.to_not raise_error
32
+ end
33
+
34
+
35
+ end
@@ -0,0 +1,11 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
4
+ require 'NGSCI'
5
+
6
+
7
+ RSpec.configure do |config|
8
+
9
+ config.color=true
10
+
11
+ end
Binary file
Binary file
Binary file