ngs-ci 0.0.1.a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +15 -0
- data/.rspec +1 -0
- data/.travis.yml +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +674 -0
- data/README.md +43 -0
- data/Rakefile +6 -0
- data/TODO.md +31 -0
- data/TODO.org +39 -0
- data/bin/ngs-ci +125 -0
- data/lib/NGSCI/calculator.rb +289 -0
- data/lib/NGSCI/cmd.rb +23 -0
- data/lib/NGSCI/read.rb +31 -0
- data/lib/NGSCI/version.rb +3 -0
- data/lib/NGSCI.rb +31 -0
- data/ngs-ci.gemspec +35 -0
- data/spec/lib/NGSCI_spec.rb +10 -0
- data/spec/lib/bin_spec.rb +51 -0
- data/spec/lib/calculator_spec.rb +316 -0
- data/spec/lib/cmd_spec.rb +17 -0
- data/spec/lib/read_spec.rb +35 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/test_files/empty.bam +0 -0
- data/spec/test_files/test.bam +0 -0
- data/spec/test_files/test.bam.bai +0 -0
- data/spec/test_files/test.fa +2 -0
- metadata +209 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
testfasta="spec/test_files/test.fa"
|
4
|
+
testbam="spec/test_files/test.bam"
|
5
|
+
|
6
|
+
describe "bin executable" do
|
7
|
+
|
8
|
+
it "runs the --help option" do
|
9
|
+
c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --help")
|
10
|
+
c.run
|
11
|
+
expect(c.stdout).to match /DESCRIPTION/
|
12
|
+
expect(c.status).to be_success
|
13
|
+
end
|
14
|
+
context "non-existent input files" do
|
15
|
+
it "fails on a non-existent bam file" do
|
16
|
+
c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam foo.bam --reference #{testfasta}")
|
17
|
+
c.run
|
18
|
+
expect(c.status).to_not be_success
|
19
|
+
end
|
20
|
+
|
21
|
+
it "fails on a non-existent fasta file" do
|
22
|
+
c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference foo.fasta")
|
23
|
+
c.run
|
24
|
+
expect(c.status).to_not be_success
|
25
|
+
end
|
26
|
+
end
|
27
|
+
context "improper options" do
|
28
|
+
it "fails on improper strand-specific argument" do
|
29
|
+
c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --strand G")
|
30
|
+
c.run
|
31
|
+
expect(c.status).to_not be_success
|
32
|
+
end
|
33
|
+
it "fails on improper loglevel argument" do
|
34
|
+
c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --loglevel foo")
|
35
|
+
c.run
|
36
|
+
expect(c.status).to_not be_success
|
37
|
+
end
|
38
|
+
end
|
39
|
+
it "runs on test data" do
|
40
|
+
c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --strand F")
|
41
|
+
c.run
|
42
|
+
expect(c.status).to be_success
|
43
|
+
end
|
44
|
+
|
45
|
+
it "produces speed information" do
|
46
|
+
c=NGSCI::Cmd.new("bundle exec bin/ngs-ci --bam #{testbam} --reference #{testfasta} --loglevel debug")
|
47
|
+
c.run
|
48
|
+
expect(c.stdout).to include("Measure Mode:")
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,316 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bio-samtools'
|
3
|
+
|
4
|
+
testbam="spec/test_files/test.bam"
|
5
|
+
emptybam="spec/test_files/empty.bam"
|
6
|
+
testfasta="spec/test_files/test.fa"
|
7
|
+
testout="spec/test_files/testfile.txt"
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
describe "#run" do
|
12
|
+
context "during a strand specific run" do
|
13
|
+
before(:each) do
|
14
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta,strand:"FR")
|
15
|
+
@testchrom=@calc.chroms.keys[0]
|
16
|
+
end
|
17
|
+
it "returns a hash" do
|
18
|
+
expect(@calc.run(runtime: false)).to be_instance_of Hash
|
19
|
+
end
|
20
|
+
it "returns the hash with keys of the chromosomes names" do
|
21
|
+
expect(@calc.run.keys).to eq(@calc.chroms.keys)
|
22
|
+
end
|
23
|
+
it "returns the hash with keys for each strand" do
|
24
|
+
expect(@calc.run[@testchrom].keys).to eq(%w(+ -))
|
25
|
+
end
|
26
|
+
it "returns NGSCI for each base of the genome" do
|
27
|
+
expect(@calc.run[@testchrom]["+"].size).to eq(@calc.chroms[@testchrom])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "during an unstranded run" do
|
32
|
+
before(:each) do
|
33
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
34
|
+
@testchrom=@calc.chroms.keys[0]
|
35
|
+
end
|
36
|
+
it "returns the hash with keys of the chromosomes names" do
|
37
|
+
expect(@calc.run.keys).to eq(@calc.chroms.keys)
|
38
|
+
end
|
39
|
+
it "returns the hash with a nil strand key" do
|
40
|
+
expect(@calc.run[@testchrom].keys[0]).to be nil
|
41
|
+
end
|
42
|
+
it "returns NGSCI for each base of the genome" do
|
43
|
+
expect(@calc.run[@testchrom][nil].size).to eq(@calc.chroms[@testchrom])
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe "#readblock" do
|
49
|
+
context "when reading the first block" do
|
50
|
+
it "returns a hash with an array of length @block_size" do
|
51
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
52
|
+
results=@calc.readblock(@calc.chroms.keys[0],0)
|
53
|
+
result_length=results[results.keys[0]].size
|
54
|
+
expect(result_length).to eq(@calc.block_size)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
context "when reading any other block" do
|
58
|
+
it "returns a hash with and array of length @block_size" do
|
59
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
60
|
+
results=@calc.readblock(@calc.chroms.keys[0],1)
|
61
|
+
result_length=results[results.keys[0]].size
|
62
|
+
expect(result_length).to eq(@calc.block_size)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "#sci" do
|
68
|
+
context "when passed an array of read objects" do
|
69
|
+
before(:each) do
|
70
|
+
@calc = NGSCI::Calculator.new(testbam,testfasta)
|
71
|
+
@bam = Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
72
|
+
@bam.open
|
73
|
+
@reads = []
|
74
|
+
@bam.fetch("NC_001988.2",75,75){|x| read = @calc.convert(x); @reads << read unless read.nil?}
|
75
|
+
@reads = @reads.uniq{|r|r.start}
|
76
|
+
end
|
77
|
+
it "returns an array" do
|
78
|
+
expect(@calc.sci(@reads)).to be_kind_of(Array)
|
79
|
+
end
|
80
|
+
it "returns the sequencing complexity index" do
|
81
|
+
expect(@calc.sci(@reads)[-1]).to eq(0.0)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
context "when passed an empty array" do
|
85
|
+
it "returns nil" do
|
86
|
+
@calc = NGSCI::Calculator.new(testbam,testfasta)
|
87
|
+
empty_sci = @calc.sci([])[-1]
|
88
|
+
expect(empty_sci).to be_zero
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "#read_length" do
|
94
|
+
it "calculates the read length" do
|
95
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
96
|
+
expect(@calc.buffer).to eq(76)
|
97
|
+
end
|
98
|
+
it "fails on an empty bam file" do
|
99
|
+
expect{NGSCI::Calculator.new(emptybam,testfasta)}.to raise_error(NGSCI::NGSCIIOError)
|
100
|
+
`rm #{emptybam}.bai`
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
describe "#summed_overlaps" do
|
105
|
+
it "returns an int" do
|
106
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
107
|
+
@bam.open
|
108
|
+
@reads = []
|
109
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
110
|
+
@bam.fetch("NC_001988.2",8,75) {|x| read=@calc.convert(x); @reads << read if read}
|
111
|
+
@reads = @reads.uniq{|r| r.start}
|
112
|
+
expect(@calc.summed_overlaps(@reads)).to be_an(Integer)
|
113
|
+
end
|
114
|
+
context "when passed an array of read objects" do
|
115
|
+
before(:each) do
|
116
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
117
|
+
@bam.open
|
118
|
+
@reads = []
|
119
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
120
|
+
@bam.fetch("NC_001988.2",8,75) {|x| read=@calc.convert(x); @reads << read if read}
|
121
|
+
@reads = @reads.uniq{|r| r.start}
|
122
|
+
end
|
123
|
+
it "returns the #overlap of two reads" do
|
124
|
+
summed_overlap = 2*@calc.overlap(@reads[0],@reads[1])
|
125
|
+
expect(@calc.summed_overlaps(@reads[0..1])).to eq(summed_overlap)
|
126
|
+
end
|
127
|
+
|
128
|
+
it "calculates the average overlap between a group of reads" do
|
129
|
+
expect(@calc.summed_overlaps(@reads[0..7]).round(4)).to eq(380.0)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
context "when passed an array with a single read object" do
|
133
|
+
it "returns zero" do
|
134
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
135
|
+
@bam.open
|
136
|
+
@reads=[]
|
137
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
138
|
+
@bam.fetch("NC_001988.2",8,75) {|x| read=@calc.convert(x); @reads << read if read}
|
139
|
+
expect(@calc.summed_overlaps([@reads[0]])).to be_zero
|
140
|
+
end
|
141
|
+
end
|
142
|
+
context "when passed an empty array" do
|
143
|
+
it "returns zero" do
|
144
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
145
|
+
expect(@calc.summed_overlaps([])).to be_zero
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "#overlap" do
|
151
|
+
before(:each) do
|
152
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
153
|
+
@bam.open
|
154
|
+
@reads=[]
|
155
|
+
@bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
|
156
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
157
|
+
@read1=@calc.convert(@reads[2])
|
158
|
+
@read2=@calc.convert(@reads[3])
|
159
|
+
end
|
160
|
+
it "calculates the overlap between two reads" do
|
161
|
+
expect(@calc.overlap(@read1,@read2)).to eq(14)
|
162
|
+
end
|
163
|
+
|
164
|
+
it "calculates the overlap regardless of order" do
|
165
|
+
expect(@calc.overlap(@read2,@read1)).to eq(14)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
describe '#reference_sequences' do
|
170
|
+
before(:each) do
|
171
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
172
|
+
end
|
173
|
+
|
174
|
+
it "retrieves reference sequences" do
|
175
|
+
expect(@calc.reference_sequences(testfasta).keys).to include "NC_001988.2"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
describe '#newread' do
|
180
|
+
before(:each) do
|
181
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
182
|
+
@bam.open
|
183
|
+
@reads=[]
|
184
|
+
@bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
|
185
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
186
|
+
end
|
187
|
+
|
188
|
+
it "converts an alignment object to a read object" do
|
189
|
+
expect(@calc.newread(@reads[0])).to be_instance_of NGSCI::Read
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
describe "#fr" do
|
194
|
+
before(:each) do
|
195
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
196
|
+
@bam.open
|
197
|
+
@reads=[]
|
198
|
+
@bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
|
199
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
200
|
+
@testpair=[@reads[5],@reads[10]]
|
201
|
+
end
|
202
|
+
|
203
|
+
it "converts the first read with FR chemistry" do
|
204
|
+
first=@calc.fr(@testpair[0])
|
205
|
+
expect(first.strand).to eq("+")
|
206
|
+
end
|
207
|
+
it "converts the second read with FR chemistry" do
|
208
|
+
second=@calc.fr(@testpair[1])
|
209
|
+
expect(second.strand).to eq("+")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
describe "#rf" do
|
214
|
+
before(:each) do
|
215
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
216
|
+
@bam.open
|
217
|
+
@reads=[]
|
218
|
+
@bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
|
219
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
220
|
+
@testpair=[@reads[5],@reads[10]]
|
221
|
+
end
|
222
|
+
it "converts the first read with RF chemistry" do
|
223
|
+
first=@calc.rf(@testpair[0])
|
224
|
+
expect(first.strand).to eq("-")
|
225
|
+
end
|
226
|
+
|
227
|
+
it "converts the second read with RF chemistry" do
|
228
|
+
second=@calc.rf(@testpair[1])
|
229
|
+
expect(second.strand).to eq("-")
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
describe "#f" do
|
234
|
+
before(:each) do
|
235
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
236
|
+
@bam.open
|
237
|
+
@reads=[]
|
238
|
+
@bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
|
239
|
+
@calc=NGSCI::Calculator.new(testbam,testfasta)
|
240
|
+
@testpair=[@reads[5],@reads[10]]
|
241
|
+
end
|
242
|
+
it "converts a read with F chemistry on the + strand" do
|
243
|
+
first=@calc.f(@testpair[0])
|
244
|
+
expect(first.strand).to eq("-")
|
245
|
+
end
|
246
|
+
it "converts a read with F chemistry on the - strand" do
|
247
|
+
second=@calc.f(@testpair[1])
|
248
|
+
expect(second.strand).to eq("+")
|
249
|
+
end
|
250
|
+
|
251
|
+
end
|
252
|
+
|
253
|
+
describe '#convert' do
|
254
|
+
before(:each) do
|
255
|
+
@bam=Bio::DB::Sam.new(:bam=>testbam,:fasta=>testfasta)
|
256
|
+
@bam.open
|
257
|
+
@reads=[]
|
258
|
+
@bam.fetch("NC_001988.2",0,200) {|x| @reads << x}
|
259
|
+
end
|
260
|
+
it "converts an alignment object to a read object" do
|
261
|
+
calc=NGSCI::Calculator.new(testbam,testfasta)
|
262
|
+
expect(calc.convert(@reads[2])).to be_instance_of NGSCI::Read
|
263
|
+
end
|
264
|
+
|
265
|
+
it "returns nil for an unmapped read" do
|
266
|
+
calc=NGSCI::Calculator.new(testbam,testfasta)
|
267
|
+
expect(calc.convert(@reads[1])).to be_nil
|
268
|
+
end
|
269
|
+
|
270
|
+
it "converts the first read in FR chemistry aligned to the + strand" do
|
271
|
+
calc=NGSCI::Calculator.new(testbam,testfasta,strand:"FR")
|
272
|
+
testpair=[@reads[5],@reads[10]]
|
273
|
+
first=calc.convert(testpair[1])
|
274
|
+
expect(first.strand).to eq("+")
|
275
|
+
end
|
276
|
+
it "converts the second read in FR chemistry aligned to the - strand" do
|
277
|
+
calc=NGSCI::Calculator.new(testbam,testfasta,strand:"FR")
|
278
|
+
testpair=[@reads[5],@reads[10]]
|
279
|
+
second=calc.convert(testpair[0])
|
280
|
+
expect(second.strand).to eq("+")
|
281
|
+
end
|
282
|
+
it "converts the first read in RF chemistry aligned to the + strand" do
|
283
|
+
calc=NGSCI::Calculator.new(testbam,testfasta,strand:"RF")
|
284
|
+
testpair=[@reads[5],@reads[10]]
|
285
|
+
first=calc.convert(testpair[1])
|
286
|
+
expect(first.strand).to eq("-")
|
287
|
+
end
|
288
|
+
it "converts the second read in FR chemistry aligned to the - strand" do
|
289
|
+
calc=NGSCI::Calculator.new(testbam,testfasta,strand:"RF")
|
290
|
+
testpair=[@reads[5],@reads[10]]
|
291
|
+
second=calc.convert(testpair[0])
|
292
|
+
expect(second.strand).to eq("-")
|
293
|
+
end
|
294
|
+
|
295
|
+
end
|
296
|
+
|
297
|
+
describe "#export" do
|
298
|
+
context "calculator has not run" do
|
299
|
+
it "returns nil" do
|
300
|
+
calc=NGSCI::Calculator.new(testbam,testfasta)
|
301
|
+
expect(calc.export(testout)).to be nil
|
302
|
+
end
|
303
|
+
end
|
304
|
+
context "calculator has run" do
|
305
|
+
after(:all) do
|
306
|
+
`rm #{testout}`
|
307
|
+
end
|
308
|
+
it "returns outfile" do
|
309
|
+
calc=NGSCI::Calculator.new(testbam,testfasta)
|
310
|
+
calc.run
|
311
|
+
expect(calc.export(testout)).to eq(testout)
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
|
4
|
+
describe "command" do
|
5
|
+
|
6
|
+
it "runs commands" do
|
7
|
+
c=NGSCI::Cmd.new("echo success")
|
8
|
+
c.run
|
9
|
+
expect(c.stdout.chomp).to eq("success")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "receives commands" do
|
13
|
+
c=NGSCI::Cmd.new("echo success")
|
14
|
+
expect(c.to_s).to eq("echo success")
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
|
4
|
+
describe "reads" do
|
5
|
+
|
6
|
+
it "fails to instantiate on a string start site" do
|
7
|
+
expect{NGSCI::Read.new("foo",3)}.to raise_error(NGSCI::NGSCIError)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "fails to instantiate on a string stop site" do
|
11
|
+
expect{NGSCI::Read.new(1,"foo")}.to raise_error(NGSCI::NGSCIError)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "fails to instantiate when the stop site is greater than the start site" do
|
15
|
+
expect{NGSCI::Read.new(3,1)}.to raise_error(NGSCI::NGSCIError)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "fails to instantiate on an improper strand argument" do
|
19
|
+
expect{NGSCI::Read.new(1,3,strand:"foo")}.to raise_error(NGSCI::NGSCIError)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "fails to instantiate without the three necessary arguments" do
|
23
|
+
expect{NGSCI::Read.new(1)}.to raise_error(ArgumentError)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "instantiates a new read with proper unstranded arguments" do
|
27
|
+
expect{NGSCI::Read.new(1,3)}.to_not raise_error
|
28
|
+
end
|
29
|
+
|
30
|
+
it "instantiates a new read with proper stranded arguments" do
|
31
|
+
expect{NGSCI::Read.new(1,3,strand:"+")}.to_not raise_error
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
end
|
data/spec/spec_helper.rb
ADDED
Binary file
|
Binary file
|
Binary file
|