bio-samtools 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +24 -0
- data/LICENSE.txt +702 -0
- data/README.rdoc +85 -0
- data/Rakefile +59 -0
- data/VERSION +1 -0
- data/bio-samtools.gemspec +105 -0
- data/ext/mkrf_conf.rb +65 -0
- data/lib/bio-samtools.rb +2 -0
- data/lib/bio/.DS_Store +0 -0
- data/lib/bio/db/sam.rb +325 -0
- data/lib/bio/db/sam/bam.rb +210 -0
- data/lib/bio/db/sam/external/COPYING +21 -0
- data/lib/bio/db/sam/external/VERSION +1 -0
- data/lib/bio/db/sam/faidx.rb +21 -0
- data/lib/bio/db/sam/library.rb +25 -0
- data/lib/bio/db/sam/sam.rb +84 -0
- data/test/basictest.rb +308 -0
- data/test/coverage.rb +26 -0
- data/test/coverage_plot.rb +28 -0
- data/test/feature.rb +0 -0
- data/test/helper.rb +18 -0
- data/test/samples/small/ids2.txt +1 -0
- data/test/samples/small/sorted.bam +0 -0
- data/test/samples/small/test +0 -0
- data/test/samples/small/test.bam +0 -0
- data/test/samples/small/test.fa +20 -0
- data/test/samples/small/test.fai +0 -0
- data/test/samples/small/test.sai +0 -0
- data/test/samples/small/test.tam +10 -0
- data/test/samples/small/test_chr.fasta +1000 -0
- data/test/samples/small/test_chr.fasta.amb +2 -0
- data/test/samples/small/test_chr.fasta.ann +3 -0
- data/test/samples/small/test_chr.fasta.bwt +0 -0
- data/test/samples/small/test_chr.fasta.fai +1 -0
- data/test/samples/small/test_chr.fasta.pac +0 -0
- data/test/samples/small/test_chr.fasta.rbwt +0 -0
- data/test/samples/small/test_chr.fasta.rpac +0 -0
- data/test/samples/small/test_chr.fasta.rsa +0 -0
- data/test/samples/small/test_chr.fasta.sa +0 -0
- data/test/samples/small/testu.bam +0 -0
- data/test/samples/small/testu.bam.bai +0 -0
- data/test/test_bio-samtools.rb +7 -0
- metadata +185 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
# require 'rubygems'
|
2
|
+
# require'ffi'
|
3
|
+
# require 'bio/db/sam/faidx'
|
4
|
+
# require 'bio/db/sam/sam'
|
5
|
+
module Bio
|
6
|
+
class DB
|
7
|
+
module SAM
|
8
|
+
module Tools
|
9
|
+
extend FFI::Library
|
10
|
+
|
11
|
+
#ffi_lib "#{File.join(File.expand_path(File.dirname(__FILE__)),'external','libbam.dylib')}"
|
12
|
+
ffi_lib Bio::DB::SAM::Library.filename
|
13
|
+
|
14
|
+
BAM_FPAIRED = 1
|
15
|
+
BAM_FPROPER_PAIR = 2
|
16
|
+
BAM_FUNMAP = 4
|
17
|
+
BAM_FMUNMAP = 8
|
18
|
+
BAM_FREVERSE = 16
|
19
|
+
BAM_FMREVERSE = 32
|
20
|
+
BAM_FREAD1 = 64
|
21
|
+
BAM_FREAD2 = 128
|
22
|
+
BAM_FSECONDARY = 256
|
23
|
+
BAM_FQCFAIL = 512
|
24
|
+
BAM_FDUP = 1024
|
25
|
+
BAM_OFDEC = 0
|
26
|
+
BAM_OFHEX = 1
|
27
|
+
BAM_OFSTR = 2
|
28
|
+
BAM_DEF_MASK = (4|256|512|1024)
|
29
|
+
BAM_CIGAR_SHIFT = 4
|
30
|
+
BAM_CIGAR_MASK = ((1 << 4) -1)
|
31
|
+
BAM_CMATCH = 0
|
32
|
+
BAM_CINS = 1
|
33
|
+
BAM_CDEL = 2
|
34
|
+
BAM_CREF_SKIP = 3
|
35
|
+
BAM_CSOFT_CLIP = 4
|
36
|
+
BAM_CHARD_CLIP = 5
|
37
|
+
BAM_CPAD = 6
|
38
|
+
class Bam1CoreT < FFI::Struct
|
39
|
+
#uint32_t bin:16, qual:8, l_qname:8;
|
40
|
+
#uint32_t flag:16, n_cigar:16;
|
41
|
+
layout(
|
42
|
+
:tid, :int32_t,
|
43
|
+
:pos, :int32_t,
|
44
|
+
:bin, :uint16,
|
45
|
+
:qual, :uint8,
|
46
|
+
:l_qname, :uint8,
|
47
|
+
:flag, :uint16,
|
48
|
+
:n_cigar, :uint16,
|
49
|
+
:l_qseq, :int32_t,
|
50
|
+
:mtid, :int32_t,
|
51
|
+
:mpos, :int32_t,
|
52
|
+
:isize, :int32_t
|
53
|
+
)
|
54
|
+
end
|
55
|
+
class Bam1T < FFI::Struct
|
56
|
+
layout(
|
57
|
+
:core, Bam1CoreT,
|
58
|
+
:l_aux, :int,
|
59
|
+
:data_len, :int,
|
60
|
+
:m_data, :int,
|
61
|
+
:data, :pointer
|
62
|
+
)
|
63
|
+
def qname
|
64
|
+
#bam1_qname(b) ((char*)((b)->data))
|
65
|
+
data = self[:data]
|
66
|
+
data.read_string()
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
Bam_NT16_Rev_Table = "=ACMGRSVTWYHKDBN"
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
attach_function :sam_open, [ :string ], :pointer
|
77
|
+
attach_function :sam_close, [ :pointer ], :void
|
78
|
+
attach_function :sam_read1, [ :pointer, :pointer, :pointer ], :int
|
79
|
+
attach_function :sam_header_read2, [ :string ], :pointer
|
80
|
+
attach_function :sam_header_read, [ :pointer ], :pointer
|
81
|
+
attach_function :sam_header_parse, [ :pointer ], :int
|
82
|
+
#attach_function :sam_header_parse_rg, [ :pointer ], :int This is declared in the .h file, but is not implemented
|
83
|
+
#attach_function :bam_strmap_put, [ :pointer, :string, :string ], :int
|
84
|
+
#attach_function :bam_strmap_get, [ :pointer, :string ], :string
|
85
|
+
#attach_function :bam_strmap_dup, [ :pointer ], :pointer
|
86
|
+
#attach_function :bam_strmap_init, [ ], :pointer
|
87
|
+
#attach_function :bam_strmap_destroy, [ :pointer ], :void
|
88
|
+
attach_function :bam_header_init, [ ], :pointer
|
89
|
+
attach_function :bam_header_destroy, [ :pointer ], :void
|
90
|
+
attach_function :bam_header_read, [ :pointer ], :pointer
|
91
|
+
attach_function :bam_header_write, [ :pointer, :pointer ], :int
|
92
|
+
attach_function :bam_read1, [ :pointer, :pointer ], :int
|
93
|
+
attach_function :bam_write1_core, [ :pointer, :pointer, :int, :pointer ], :int
|
94
|
+
attach_function :bam_write1, [ :pointer, :pointer ], :int
|
95
|
+
attach_function :bam_format1, [ :pointer, :pointer ], :string
|
96
|
+
attach_function :bam_format1_core, [ :pointer, :pointer, :int ], :string
|
97
|
+
attach_function :bam_get_library, [ :pointer, :pointer ], :string
|
98
|
+
class BamPileup1T < FFI::Struct
|
99
|
+
layout(
|
100
|
+
:b, :pointer,
|
101
|
+
:qpos, :int32_t,
|
102
|
+
:indel, :int,
|
103
|
+
:level, :int,
|
104
|
+
:is_del, :uint32,
|
105
|
+
:is_head, :uint32,
|
106
|
+
:is_tail, :uint32
|
107
|
+
)
|
108
|
+
end
|
109
|
+
attach_function :bam_plbuf_set_mask, [ :pointer, :int ], :void
|
110
|
+
callback :bam_pileup_f, [ :uint32, :uint32, :int, :pointer, :pointer ], :int
|
111
|
+
attach_function :bam_plbuf_reset, [ :pointer ], :void
|
112
|
+
attach_function :bam_plbuf_init, [ :bam_pileup_f, :pointer ], :pointer
|
113
|
+
attach_function :bam_plbuf_destroy, [ :pointer ], :void
|
114
|
+
attach_function :bam_plbuf_push, [ :pointer, :pointer ], :int
|
115
|
+
attach_function :bam_pileup_file, [ :pointer, :int, :bam_pileup_f, :pointer ], :int
|
116
|
+
attach_function :bam_lplbuf_reset, [ :pointer ], :void
|
117
|
+
attach_function :bam_lplbuf_init, [ :bam_pileup_f, :pointer ], :pointer
|
118
|
+
attach_function :bam_lplbuf_destroy, [ :pointer ], :void
|
119
|
+
attach_function :bam_lplbuf_push, [ :pointer, :pointer ], :int
|
120
|
+
attach_function :bam_index_build, [ :string ], :int
|
121
|
+
attach_function :bam_index_load, [ :string ], :pointer
|
122
|
+
attach_function :bam_index_destroy, [ :pointer ], :void
|
123
|
+
|
124
|
+
#The function for fetching stuff
|
125
|
+
#typedef int ( *bam_fetch_f)(
|
126
|
+
# const bam1_t *b,
|
127
|
+
# void *data);
|
128
|
+
callback(:bam_fetch_f, [ :pointer, :pointer ], :int)
|
129
|
+
attach_function :bam_fetch, [ :pointer, :pointer, :int, :int, :int, :pointer, :bam_fetch_f ], :int
|
130
|
+
attach_function :bam_parse_region, [ :pointer, :pointer, :pointer, :pointer, :pointer ], :int
|
131
|
+
|
132
|
+
#The second parameter must be only 2 characters long
|
133
|
+
attach_function :bam_aux_get, [ :pointer, :string], :pointer
|
134
|
+
attach_function :bam_aux2i, [ :pointer ], :int32_t
|
135
|
+
attach_function :bam_aux2f, [ :pointer ], :float
|
136
|
+
attach_function :bam_aux2d, [ :pointer ], :double
|
137
|
+
attach_function :bam_aux2A, [ :pointer ], :char
|
138
|
+
attach_function :bam_aux2Z, [ :pointer ], :string
|
139
|
+
attach_function :bam_aux_del, [ :pointer, :pointer ], :int
|
140
|
+
#The second parameter must be only 2 characters long
|
141
|
+
attach_function :bam_aux_append, [ :pointer, :string, :char, :int, :pointer ], :void
|
142
|
+
#The second parameter must be only 2 characters long
|
143
|
+
attach_function :bam_aux_get_core, [ :pointer,:string ], :pointer
|
144
|
+
attach_function :bam_calend, [ :pointer, :pointer ], :uint32
|
145
|
+
attach_function :bam_cigar2qlen, [ :pointer, :pointer ], :int32_t
|
146
|
+
|
147
|
+
#FIXME: if we see that we need this function, implement it on ruby, seems like FFI is having problems with
|
148
|
+
#te static inline.
|
149
|
+
#attach_function :bam_reg2bin, [ :uint32, :uint32 ], :int
|
150
|
+
#FIXME: if we see that we need this function, implement it on ruby, seems like FFI is having problems with
|
151
|
+
#te static inline.
|
152
|
+
#attach_function :bam_copy1, [ :pointer, :pointer ], :pointer
|
153
|
+
#FIXME: if we see that we need this function, implement it on ruby, seems like FFI is having problems with
|
154
|
+
#te static inline.
|
155
|
+
#attach_function :bam_dup1, [ :pointer ], :pointer
|
156
|
+
|
157
|
+
|
158
|
+
#bam sort
|
159
|
+
# @abstract Sort an unsorted BAM file based on the chromosome order
|
160
|
+
# and the leftmost position of an alignment
|
161
|
+
#
|
162
|
+
# @param is_by_qname whether to sort by query name
|
163
|
+
# @param fn name of the file to be sorted
|
164
|
+
# @param prefix prefix of the output and the temporary files; upon
|
165
|
+
# sucessess, prefix.bam will be written.
|
166
|
+
# @param max_mem approxiate maximum memory (very inaccurate)
|
167
|
+
#
|
168
|
+
# @discussion It may create multiple temporary subalignment files
|
169
|
+
# and then merge them by calling bam_merge_core(). This function is
|
170
|
+
# NOT thread safe.
|
171
|
+
attach_function :bam_sort_core, [:int, :string, :string, :int], :void
|
172
|
+
def self.bam_sort(bam_filename, bam_output_prefix)
|
173
|
+
is_by_name = 0
|
174
|
+
max_mem = 500000000
|
175
|
+
bam_sort_core(is_by_name, bam_filename, bam_output_prefix, max_mem)
|
176
|
+
end
|
177
|
+
|
178
|
+
# @abstract Merge multiple sorted BAM.
|
179
|
+
# @param is_by_qname whether to sort by query name
|
180
|
+
# @param out output BAM file name
|
181
|
+
# @param headers name of SAM file from which to copy '@' header lines,
|
182
|
+
# or NULL to copy them from the first file to be merged
|
183
|
+
# @param n number of files to be merged
|
184
|
+
# @param fn names of files to be merged
|
185
|
+
#
|
186
|
+
# @discussion Padding information may NOT correctly maintained. This
|
187
|
+
# function is NOT thread safe.
|
188
|
+
# int bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn,
|
189
|
+
# int flag, const char *reg)
|
190
|
+
|
191
|
+
# attach_function :bam_merge_core, [:int, :string, :string, :int, :pointer, :int, :string], :int
|
192
|
+
# def self.bam_merge(bam_output_file_name, bam_array_input_file_names, rg)
|
193
|
+
# is_by_qname = 0
|
194
|
+
# headers = ""
|
195
|
+
# flag = 0
|
196
|
+
# ary = bam_array_input_file_names.map do |filename|
|
197
|
+
# FFI::MemoryPointer.from_string(filename)
|
198
|
+
# end
|
199
|
+
# ary << nil
|
200
|
+
# fns=FFI::MemoryPointer.new :pointer, ary.size
|
201
|
+
# ary.each_with_index do |p_filename, idx|
|
202
|
+
# fns[idx].put_pointer(0, p_filename)
|
203
|
+
# end
|
204
|
+
#
|
205
|
+
# bam_merge_core(is_by_qname, bam_output_file_name, headers, bam_array_input_file_names.size, fns, flag, rg)
|
206
|
+
# end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008-2009 Genome Research Ltd.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.16
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#require 'rubygems'
|
2
|
+
#require'ffi'
|
3
|
+
#require 'bio/db/sam/bam'
|
4
|
+
module Bio
|
5
|
+
class DB
|
6
|
+
module SAM
|
7
|
+
module Tools
|
8
|
+
extend FFI::Library
|
9
|
+
#ffi_lib "#{File.join(File.expand_path(File.dirname(__FILE__)),'external','libbam.dylib')}"
|
10
|
+
ffi_lib Bio::DB::SAM::Library.filename
|
11
|
+
|
12
|
+
attach_function :fai_build, [ :string ], :int
|
13
|
+
attach_function :fai_destroy, [ :pointer ], :void
|
14
|
+
attach_function :fai_load, [ :string ], :pointer
|
15
|
+
attach_function :fai_fetch, [ :pointer, :string, :pointer ], :string
|
16
|
+
attach_function :faidx_fetch_nseq, [ :pointer ], :int
|
17
|
+
attach_function :faidx_fetch_seq, [ :pointer, :string, :int, :int, :pointer ], :string
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Bio
|
2
|
+
class DB
|
3
|
+
module SAM
|
4
|
+
module Library
|
5
|
+
#IMPORTANT NOTE: Windows library is missing in this distribution
|
6
|
+
|
7
|
+
# Return the path with the file name of the library for the specific operating system
|
8
|
+
def filename
|
9
|
+
#TODO refactor this piece of code in all the files
|
10
|
+
lib_os = case RUBY_PLATFORM
|
11
|
+
when /linux/
|
12
|
+
'so.1'
|
13
|
+
when /darwin/
|
14
|
+
'1.dylib'
|
15
|
+
when /windows/
|
16
|
+
'dll'
|
17
|
+
end
|
18
|
+
|
19
|
+
File.join(File.expand_path(File.dirname(__FILE__)),'external',"libbam.#{lib_os}")
|
20
|
+
end #filename
|
21
|
+
module_function :filename
|
22
|
+
end #Library
|
23
|
+
end #Sam
|
24
|
+
end #DB
|
25
|
+
end #Bio
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# sam.rb
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Created by Ricardo Ramirez on 3/25/10.
|
6
|
+
#
|
7
|
+
# require 'rubygems'
|
8
|
+
# require'ffi'
|
9
|
+
# require 'bio/db/sam/bam'
|
10
|
+
module Bio
|
11
|
+
class DB
|
12
|
+
module SAM
|
13
|
+
module Tools
|
14
|
+
extend FFI::Library
|
15
|
+
|
16
|
+
# ffi_lib "#{File.join(File.expand_path(File.dirname(__FILE__)),'external','libbam.#{lib_os}')}"
|
17
|
+
ffi_lib Bio::DB::SAM::Library.filename
|
18
|
+
|
19
|
+
# typedef struct {
|
20
|
+
# int32_t n_targets;
|
21
|
+
# char **target_name;
|
22
|
+
# uint32_t *target_len;
|
23
|
+
# void *dict, *hash, *rg2lib;
|
24
|
+
# int l_text;
|
25
|
+
# char *text;
|
26
|
+
# } bam_header_t;
|
27
|
+
class BamHeaderT < FFI::Struct
|
28
|
+
layout(
|
29
|
+
:n_targets, :int32_t,
|
30
|
+
:target_name, :pointer,
|
31
|
+
:target_len, :pointer,
|
32
|
+
:dict, :pointer,
|
33
|
+
:hash, :pointer,
|
34
|
+
:rg2lib, :pointer,
|
35
|
+
:l_text, :int,
|
36
|
+
:text, :pointer
|
37
|
+
)
|
38
|
+
def text=(str)
|
39
|
+
@text = FFI::MemoryPointer.from_string(str)
|
40
|
+
self[:text] = @text
|
41
|
+
end
|
42
|
+
def text
|
43
|
+
@text.get_string(0)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
class SamfileTX < FFI::Union
|
49
|
+
layout(
|
50
|
+
:tamr, :pointer, #Text file, read.
|
51
|
+
:bam, :pointer, #bamFile,
|
52
|
+
:tamw, :pointer #Text file, write.
|
53
|
+
)
|
54
|
+
end
|
55
|
+
# typedef struct {
|
56
|
+
# int type;
|
57
|
+
# union {
|
58
|
+
# tamFile tamr;
|
59
|
+
# bamFile bam;
|
60
|
+
# FILE *tamw;
|
61
|
+
# } x;
|
62
|
+
# bam_header_t *header;
|
63
|
+
# } samfile_t;
|
64
|
+
class SamfileT < FFI::Struct
|
65
|
+
layout(
|
66
|
+
:type, :int,
|
67
|
+
:x, SamfileTX,
|
68
|
+
:header, :pointer
|
69
|
+
#:header, BamHeaderT
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
attach_function :samclose, [ :pointer ], :void
|
76
|
+
attach_function :samread, [ :pointer, :pointer ], :int
|
77
|
+
attach_function :samopen, [ :string, :string, :pointer ], :pointer
|
78
|
+
attach_function :samwrite, [ :pointer, :pointer ], :int
|
79
|
+
attach_function :sampileup, [ :pointer, :int, :bam_pileup_f, :pointer ], :int
|
80
|
+
attach_function :samfaipath, [ :string ], :string
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/test/basictest.rb
ADDED
@@ -0,0 +1,308 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require "test/unit"
|
4
|
+
require "bio/db/sam"
|
5
|
+
require "bio/db/sam/sam"
|
6
|
+
|
7
|
+
|
8
|
+
class TestBioDbSam < Test::Unit::TestCase
|
9
|
+
|
10
|
+
#Set up the paths
|
11
|
+
def setup
|
12
|
+
@test_folder = "test/samples/small"
|
13
|
+
@testTAMFile = @test_folder + "/test.tam"
|
14
|
+
@testBAMFile = @test_folder + "/testu.bam"
|
15
|
+
@testReference = @test_folder + "/test_chr.fasta"
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
#Removing the index files
|
20
|
+
def teardown
|
21
|
+
begin
|
22
|
+
File.delete(@testReference + ".fai")
|
23
|
+
p "deleted: " + @testReference + ".fai "
|
24
|
+
rescue
|
25
|
+
end
|
26
|
+
begin
|
27
|
+
File.delete(@testBAMFile + ".fai")
|
28
|
+
p "deleted: " + @testBAMFile + ".bai "
|
29
|
+
rescue
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def default_test
|
34
|
+
puts $LOAD_PATH
|
35
|
+
assert(true, "Unit test test")
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_openSAMFile
|
39
|
+
bamfile = Bio::DB::SAM::Tools.samopen(@testTAMFile,"r",nil)
|
40
|
+
Bio::DB::SAM::Tools.samclose(bamfile)
|
41
|
+
assert(true, "file open and closed")
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_new_class_empty
|
45
|
+
begin
|
46
|
+
bam = Bio::DB::Sam.new({})
|
47
|
+
assert(false, "Should fail while opening without parameters")
|
48
|
+
rescue Bio::DB::SAMException => e
|
49
|
+
puts e.message
|
50
|
+
assert(true, e.message)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_new_class_empty_invalid_path
|
55
|
+
begin
|
56
|
+
sam = Bio::DB::Sam.new({:bam=>"INVALID"})
|
57
|
+
sam.open
|
58
|
+
sam.close
|
59
|
+
assert(false, "Should fail with an invalid path")
|
60
|
+
rescue Bio::DB::SAMException => e
|
61
|
+
puts e.message
|
62
|
+
assert(true, e.message)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_class_text_read_no_faidx
|
67
|
+
sam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
68
|
+
sam.open
|
69
|
+
sam.close
|
70
|
+
assert(true, "file open and closed with the class")
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_class_text_read_no_close
|
74
|
+
|
75
|
+
fam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
76
|
+
fam.open
|
77
|
+
fam = nil
|
78
|
+
ObjectSpace.garbage_collect
|
79
|
+
|
80
|
+
assert(true, "file openend but not closed")
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_class_binary_read_no_close
|
84
|
+
|
85
|
+
Bio::DB::Sam.new({:bam=>@testBAMFile}).open
|
86
|
+
ObjectSpace.garbage_collect
|
87
|
+
assert(true, "BINARY file openend but not closed")
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_read_coverage
|
91
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile, :fasta=>@testReference})
|
92
|
+
sam.open
|
93
|
+
File.open( @test_folder +"/ids2.txt", "r") do |file|
|
94
|
+
puts "file opened"
|
95
|
+
file.each_line{|line|
|
96
|
+
fetching = line.split(' ')[0]
|
97
|
+
puts "fetching: " + fetching
|
98
|
+
sam.load_reference
|
99
|
+
seq = sam.fetch_reference(fetching, 0, 16000)
|
100
|
+
# puts seq
|
101
|
+
# puts seq.length
|
102
|
+
als = sam.fetch(fetching, 0, seq.length)
|
103
|
+
# p als
|
104
|
+
if als.length() > 0 then
|
105
|
+
p fetching
|
106
|
+
p als
|
107
|
+
end
|
108
|
+
}
|
109
|
+
|
110
|
+
end
|
111
|
+
sam.close
|
112
|
+
assert(true, "Finish")
|
113
|
+
end
|
114
|
+
# def test_read_TAM_as_BAM
|
115
|
+
# begin
|
116
|
+
# sam = Bio::DB::Sam.new({:bam=>@testTAMFile})
|
117
|
+
# sam.open
|
118
|
+
# sam.close
|
119
|
+
# assert(false, "Should raise an exception for reading a BAM as TAM")
|
120
|
+
# rescue Bio::DB::SAMException => e
|
121
|
+
# assert(true, "Properly handled")
|
122
|
+
# end
|
123
|
+
# end
|
124
|
+
|
125
|
+
# def test_read_BAM_as_TAM
|
126
|
+
# begin
|
127
|
+
# sam = Bio::DB::Sam.new({:tam=>@testBAMFile})
|
128
|
+
# sam.open
|
129
|
+
# sam.close
|
130
|
+
# assert(false, "Should raise an exception for reading a BAM as TAM")
|
131
|
+
# rescue Bio::DB::SAMException => e
|
132
|
+
# assert(true, "Properly handled")
|
133
|
+
# end
|
134
|
+
# end
|
135
|
+
|
136
|
+
def test_bam_load_index
|
137
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
138
|
+
sam.open
|
139
|
+
index = sam.load_index
|
140
|
+
sam.close
|
141
|
+
assert(true, "BAM index loaded")
|
142
|
+
# attach_function :bam_index_build, [ :string ], :int
|
143
|
+
# attach_function :bam_index_load, [ :string ], :pointer
|
144
|
+
# attach_function :bam_index_destroy, [ :pointer ], :void
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_tam_load_index
|
148
|
+
begin
|
149
|
+
sam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
150
|
+
sam.open
|
151
|
+
sam.load_index
|
152
|
+
sam.close
|
153
|
+
assert(false, "TAM index loaded")
|
154
|
+
rescue Bio::DB::SAMException => e
|
155
|
+
assert(true, "Unable to load an index for a TAM file")
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_read_segment
|
160
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
161
|
+
sam.open
|
162
|
+
als = sam.fetch("chr_1", 0, 500)
|
163
|
+
p als
|
164
|
+
sam.close
|
165
|
+
assert(true, "Seems it ran the query")
|
166
|
+
#node_7263 238 60 has 550+, query from 0 to 500, something shall come....
|
167
|
+
end
|
168
|
+
|
169
|
+
def test_read_invalid_reference
|
170
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
171
|
+
sam.open
|
172
|
+
begin
|
173
|
+
als = sam.fetch("Chr1", 0, 500)
|
174
|
+
p als
|
175
|
+
sam.close
|
176
|
+
assert(false, "Seems it ran the query")
|
177
|
+
rescue Bio::DB::SAMException => e
|
178
|
+
p e
|
179
|
+
assert(true, "Exception generated and catched")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def test_read_invalid_reference_start_coordinate
|
184
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
185
|
+
sam.open
|
186
|
+
begin
|
187
|
+
als = sam.fetch("chr", -1, 500)
|
188
|
+
p als
|
189
|
+
sam.close
|
190
|
+
assert(false, "Seems it ran the query")
|
191
|
+
rescue Bio::DB::SAMException => e
|
192
|
+
p e
|
193
|
+
assert(true, "Exception generated and catched")
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_read_invalid_reference_end_coordinate
|
198
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
199
|
+
sam.open
|
200
|
+
begin
|
201
|
+
als = sam.fetch("chr", 0, 50000)
|
202
|
+
p als
|
203
|
+
sam.close
|
204
|
+
assert(false, "Seems it ran the query")
|
205
|
+
rescue Bio::DB::SAMException => e
|
206
|
+
p e
|
207
|
+
assert(true, "Exception generated and catched")
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def test_read_invalid_reference_swaped_coordinates
|
212
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
213
|
+
sam.open
|
214
|
+
begin
|
215
|
+
als = sam.fetch("chr", 500, 0)
|
216
|
+
p als
|
217
|
+
sam.close
|
218
|
+
assert(false, "Seems it ran the query")
|
219
|
+
rescue Bio::DB::SAMException => e
|
220
|
+
p e
|
221
|
+
assert(true, "Exception generated and catched")
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def test_fasta_load_index
|
226
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference})
|
227
|
+
sam.load_reference
|
228
|
+
seq = sam.fetch_reference("chr_1", 0, 500)
|
229
|
+
p seq
|
230
|
+
sam.close
|
231
|
+
assert(true, "The reference was loaded")
|
232
|
+
end
|
233
|
+
|
234
|
+
def test_fasta_load_index
|
235
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference})
|
236
|
+
sam.load_reference
|
237
|
+
begin
|
238
|
+
seq = sam.fetch_reference("chr1", 0, 500)
|
239
|
+
p "Error seq:"+ seq
|
240
|
+
sam.close
|
241
|
+
assert(false, "The reference was loaded")
|
242
|
+
rescue Bio::DB::SAMException => e
|
243
|
+
p e
|
244
|
+
assert(true, "The references was not loaded")
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_load_feature
|
249
|
+
|
250
|
+
fs = Feature.find_by_bam("chr_1", 0, 500,@testBAMFile)
|
251
|
+
|
252
|
+
p fs
|
253
|
+
assert(true, "Loaded as features")
|
254
|
+
end
|
255
|
+
|
256
|
+
def test_avg_coverage
|
257
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference, :bam=>@testBAMFile })
|
258
|
+
sam.open
|
259
|
+
cov = sam.average_coverage("chr_1", 60, 30)
|
260
|
+
p "Coverage: " + cov.to_s
|
261
|
+
sam.close
|
262
|
+
assert(true, "Average coverage ran")
|
263
|
+
assert(3 == cov, "The coverage is 3")
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def test_chromosome_coverage
|
268
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference, :bam=>@testBAMFile })
|
269
|
+
sam.open
|
270
|
+
covs = sam.chromosome_coverage("chr_1", 0, 60)
|
271
|
+
p "Coverage: "
|
272
|
+
p covs
|
273
|
+
puts "POS\tCOV"
|
274
|
+
covs.each_with_index{ |cov, i| puts "#{i}\t#{cov}" }
|
275
|
+
sam.close
|
276
|
+
assert(true, "Average coverage ran")
|
277
|
+
#assert(3 == cov, "The coverage is 3")
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
281
|
+
|
282
|
+
class Feature
|
283
|
+
attr_reader :start, :end, :strand, :sequence, :quality
|
284
|
+
|
285
|
+
def initialize(a={})
|
286
|
+
p a
|
287
|
+
@start = a[:start]
|
288
|
+
@end = a[:enf]
|
289
|
+
@strand = a[:strand]
|
290
|
+
@sequence = a[:sequence]
|
291
|
+
@quality = a[:quality]
|
292
|
+
end
|
293
|
+
|
294
|
+
def self.find_by_bam(reference,start,stop,bam_file_path)
|
295
|
+
|
296
|
+
sam = Bio::DB::Sam.new({:bam=>bam_file_path})
|
297
|
+
features = []
|
298
|
+
sam.open
|
299
|
+
|
300
|
+
fetchAlignment = Proc.new do |a|
|
301
|
+
a.query_strand ? strand = '+' : strand = '-'
|
302
|
+
features << Feature.new({:start=>a.pos,:end=>a.calend,:strand=>strand,:sequence=>a.seq,:quality=>a.qual})
|
303
|
+
end
|
304
|
+
sam.fetch_with_function(reference, start, stop, fetchAlignment)
|
305
|
+
sam.close
|
306
|
+
features
|
307
|
+
end
|
308
|
+
end
|