bio-samtools 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +24 -0
- data/LICENSE.txt +702 -0
- data/README.rdoc +85 -0
- data/Rakefile +59 -0
- data/VERSION +1 -0
- data/bio-samtools.gemspec +105 -0
- data/ext/mkrf_conf.rb +65 -0
- data/lib/bio-samtools.rb +2 -0
- data/lib/bio/.DS_Store +0 -0
- data/lib/bio/db/sam.rb +325 -0
- data/lib/bio/db/sam/bam.rb +210 -0
- data/lib/bio/db/sam/external/COPYING +21 -0
- data/lib/bio/db/sam/external/VERSION +1 -0
- data/lib/bio/db/sam/faidx.rb +21 -0
- data/lib/bio/db/sam/library.rb +25 -0
- data/lib/bio/db/sam/sam.rb +84 -0
- data/test/basictest.rb +308 -0
- data/test/coverage.rb +26 -0
- data/test/coverage_plot.rb +28 -0
- data/test/feature.rb +0 -0
- data/test/helper.rb +18 -0
- data/test/samples/small/ids2.txt +1 -0
- data/test/samples/small/sorted.bam +0 -0
- data/test/samples/small/test +0 -0
- data/test/samples/small/test.bam +0 -0
- data/test/samples/small/test.fa +20 -0
- data/test/samples/small/test.fai +0 -0
- data/test/samples/small/test.sai +0 -0
- data/test/samples/small/test.tam +10 -0
- data/test/samples/small/test_chr.fasta +1000 -0
- data/test/samples/small/test_chr.fasta.amb +2 -0
- data/test/samples/small/test_chr.fasta.ann +3 -0
- data/test/samples/small/test_chr.fasta.bwt +0 -0
- data/test/samples/small/test_chr.fasta.fai +1 -0
- data/test/samples/small/test_chr.fasta.pac +0 -0
- data/test/samples/small/test_chr.fasta.rbwt +0 -0
- data/test/samples/small/test_chr.fasta.rpac +0 -0
- data/test/samples/small/test_chr.fasta.rsa +0 -0
- data/test/samples/small/test_chr.fasta.sa +0 -0
- data/test/samples/small/testu.bam +0 -0
- data/test/samples/small/testu.bam.bai +0 -0
- data/test/test_bio-samtools.rb +7 -0
- metadata +185 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
# require 'rubygems'
|
2
|
+
# require'ffi'
|
3
|
+
# require 'bio/db/sam/faidx'
|
4
|
+
# require 'bio/db/sam/sam'
|
5
|
+
module Bio
|
6
|
+
class DB
|
7
|
+
module SAM
|
8
|
+
module Tools
|
9
|
+
extend FFI::Library
|
10
|
+
|
11
|
+
#ffi_lib "#{File.join(File.expand_path(File.dirname(__FILE__)),'external','libbam.dylib')}"
|
12
|
+
ffi_lib Bio::DB::SAM::Library.filename
|
13
|
+
|
14
|
+
BAM_FPAIRED = 1
|
15
|
+
BAM_FPROPER_PAIR = 2
|
16
|
+
BAM_FUNMAP = 4
|
17
|
+
BAM_FMUNMAP = 8
|
18
|
+
BAM_FREVERSE = 16
|
19
|
+
BAM_FMREVERSE = 32
|
20
|
+
BAM_FREAD1 = 64
|
21
|
+
BAM_FREAD2 = 128
|
22
|
+
BAM_FSECONDARY = 256
|
23
|
+
BAM_FQCFAIL = 512
|
24
|
+
BAM_FDUP = 1024
|
25
|
+
BAM_OFDEC = 0
|
26
|
+
BAM_OFHEX = 1
|
27
|
+
BAM_OFSTR = 2
|
28
|
+
BAM_DEF_MASK = (4|256|512|1024)
|
29
|
+
BAM_CIGAR_SHIFT = 4
|
30
|
+
BAM_CIGAR_MASK = ((1 << 4) -1)
|
31
|
+
BAM_CMATCH = 0
|
32
|
+
BAM_CINS = 1
|
33
|
+
BAM_CDEL = 2
|
34
|
+
BAM_CREF_SKIP = 3
|
35
|
+
BAM_CSOFT_CLIP = 4
|
36
|
+
BAM_CHARD_CLIP = 5
|
37
|
+
BAM_CPAD = 6
|
38
|
+
class Bam1CoreT < FFI::Struct
|
39
|
+
#uint32_t bin:16, qual:8, l_qname:8;
|
40
|
+
#uint32_t flag:16, n_cigar:16;
|
41
|
+
layout(
|
42
|
+
:tid, :int32_t,
|
43
|
+
:pos, :int32_t,
|
44
|
+
:bin, :uint16,
|
45
|
+
:qual, :uint8,
|
46
|
+
:l_qname, :uint8,
|
47
|
+
:flag, :uint16,
|
48
|
+
:n_cigar, :uint16,
|
49
|
+
:l_qseq, :int32_t,
|
50
|
+
:mtid, :int32_t,
|
51
|
+
:mpos, :int32_t,
|
52
|
+
:isize, :int32_t
|
53
|
+
)
|
54
|
+
end
|
55
|
+
class Bam1T < FFI::Struct
|
56
|
+
layout(
|
57
|
+
:core, Bam1CoreT,
|
58
|
+
:l_aux, :int,
|
59
|
+
:data_len, :int,
|
60
|
+
:m_data, :int,
|
61
|
+
:data, :pointer
|
62
|
+
)
|
63
|
+
def qname
|
64
|
+
#bam1_qname(b) ((char*)((b)->data))
|
65
|
+
data = self[:data]
|
66
|
+
data.read_string()
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
Bam_NT16_Rev_Table = "=ACMGRSVTWYHKDBN"
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
attach_function :sam_open, [ :string ], :pointer
|
77
|
+
attach_function :sam_close, [ :pointer ], :void
|
78
|
+
attach_function :sam_read1, [ :pointer, :pointer, :pointer ], :int
|
79
|
+
attach_function :sam_header_read2, [ :string ], :pointer
|
80
|
+
attach_function :sam_header_read, [ :pointer ], :pointer
|
81
|
+
attach_function :sam_header_parse, [ :pointer ], :int
|
82
|
+
#attach_function :sam_header_parse_rg, [ :pointer ], :int This is declared in the .h file, but is not implemented
|
83
|
+
#attach_function :bam_strmap_put, [ :pointer, :string, :string ], :int
|
84
|
+
#attach_function :bam_strmap_get, [ :pointer, :string ], :string
|
85
|
+
#attach_function :bam_strmap_dup, [ :pointer ], :pointer
|
86
|
+
#attach_function :bam_strmap_init, [ ], :pointer
|
87
|
+
#attach_function :bam_strmap_destroy, [ :pointer ], :void
|
88
|
+
attach_function :bam_header_init, [ ], :pointer
|
89
|
+
attach_function :bam_header_destroy, [ :pointer ], :void
|
90
|
+
attach_function :bam_header_read, [ :pointer ], :pointer
|
91
|
+
attach_function :bam_header_write, [ :pointer, :pointer ], :int
|
92
|
+
attach_function :bam_read1, [ :pointer, :pointer ], :int
|
93
|
+
attach_function :bam_write1_core, [ :pointer, :pointer, :int, :pointer ], :int
|
94
|
+
attach_function :bam_write1, [ :pointer, :pointer ], :int
|
95
|
+
attach_function :bam_format1, [ :pointer, :pointer ], :string
|
96
|
+
attach_function :bam_format1_core, [ :pointer, :pointer, :int ], :string
|
97
|
+
attach_function :bam_get_library, [ :pointer, :pointer ], :string
|
98
|
+
class BamPileup1T < FFI::Struct
|
99
|
+
layout(
|
100
|
+
:b, :pointer,
|
101
|
+
:qpos, :int32_t,
|
102
|
+
:indel, :int,
|
103
|
+
:level, :int,
|
104
|
+
:is_del, :uint32,
|
105
|
+
:is_head, :uint32,
|
106
|
+
:is_tail, :uint32
|
107
|
+
)
|
108
|
+
end
|
109
|
+
attach_function :bam_plbuf_set_mask, [ :pointer, :int ], :void
|
110
|
+
callback :bam_pileup_f, [ :uint32, :uint32, :int, :pointer, :pointer ], :int
|
111
|
+
attach_function :bam_plbuf_reset, [ :pointer ], :void
|
112
|
+
attach_function :bam_plbuf_init, [ :bam_pileup_f, :pointer ], :pointer
|
113
|
+
attach_function :bam_plbuf_destroy, [ :pointer ], :void
|
114
|
+
attach_function :bam_plbuf_push, [ :pointer, :pointer ], :int
|
115
|
+
attach_function :bam_pileup_file, [ :pointer, :int, :bam_pileup_f, :pointer ], :int
|
116
|
+
attach_function :bam_lplbuf_reset, [ :pointer ], :void
|
117
|
+
attach_function :bam_lplbuf_init, [ :bam_pileup_f, :pointer ], :pointer
|
118
|
+
attach_function :bam_lplbuf_destroy, [ :pointer ], :void
|
119
|
+
attach_function :bam_lplbuf_push, [ :pointer, :pointer ], :int
|
120
|
+
attach_function :bam_index_build, [ :string ], :int
|
121
|
+
attach_function :bam_index_load, [ :string ], :pointer
|
122
|
+
attach_function :bam_index_destroy, [ :pointer ], :void
|
123
|
+
|
124
|
+
#The function for fetching stuff
|
125
|
+
#typedef int ( *bam_fetch_f)(
|
126
|
+
# const bam1_t *b,
|
127
|
+
# void *data);
|
128
|
+
callback(:bam_fetch_f, [ :pointer, :pointer ], :int)
|
129
|
+
attach_function :bam_fetch, [ :pointer, :pointer, :int, :int, :int, :pointer, :bam_fetch_f ], :int
|
130
|
+
attach_function :bam_parse_region, [ :pointer, :pointer, :pointer, :pointer, :pointer ], :int
|
131
|
+
|
132
|
+
#The second parameter must be only 2 characters long
|
133
|
+
attach_function :bam_aux_get, [ :pointer, :string], :pointer
|
134
|
+
attach_function :bam_aux2i, [ :pointer ], :int32_t
|
135
|
+
attach_function :bam_aux2f, [ :pointer ], :float
|
136
|
+
attach_function :bam_aux2d, [ :pointer ], :double
|
137
|
+
attach_function :bam_aux2A, [ :pointer ], :char
|
138
|
+
attach_function :bam_aux2Z, [ :pointer ], :string
|
139
|
+
attach_function :bam_aux_del, [ :pointer, :pointer ], :int
|
140
|
+
#The second parameter must be only 2 characters long
|
141
|
+
attach_function :bam_aux_append, [ :pointer, :string, :char, :int, :pointer ], :void
|
142
|
+
#The second parameter must be only 2 characters long
|
143
|
+
attach_function :bam_aux_get_core, [ :pointer,:string ], :pointer
|
144
|
+
attach_function :bam_calend, [ :pointer, :pointer ], :uint32
|
145
|
+
attach_function :bam_cigar2qlen, [ :pointer, :pointer ], :int32_t
|
146
|
+
|
147
|
+
#FIXME: if we see that we need this function, implement it on ruby, seems like FFI is having problems with
|
148
|
+
#te static inline.
|
149
|
+
#attach_function :bam_reg2bin, [ :uint32, :uint32 ], :int
|
150
|
+
#FIXME: if we see that we need this function, implement it on ruby, seems like FFI is having problems with
|
151
|
+
#te static inline.
|
152
|
+
#attach_function :bam_copy1, [ :pointer, :pointer ], :pointer
|
153
|
+
#FIXME: if we see that we need this function, implement it on ruby, seems like FFI is having problems with
|
154
|
+
#te static inline.
|
155
|
+
#attach_function :bam_dup1, [ :pointer ], :pointer
|
156
|
+
|
157
|
+
|
158
|
+
#bam sort
|
159
|
+
# @abstract Sort an unsorted BAM file based on the chromosome order
|
160
|
+
# and the leftmost position of an alignment
|
161
|
+
#
|
162
|
+
# @param is_by_qname whether to sort by query name
|
163
|
+
# @param fn name of the file to be sorted
|
164
|
+
# @param prefix prefix of the output and the temporary files; upon
|
165
|
+
# sucessess, prefix.bam will be written.
|
166
|
+
# @param max_mem approxiate maximum memory (very inaccurate)
|
167
|
+
#
|
168
|
+
# @discussion It may create multiple temporary subalignment files
|
169
|
+
# and then merge them by calling bam_merge_core(). This function is
|
170
|
+
# NOT thread safe.
|
171
|
+
attach_function :bam_sort_core, [:int, :string, :string, :int], :void
|
172
|
+
def self.bam_sort(bam_filename, bam_output_prefix)
|
173
|
+
is_by_name = 0
|
174
|
+
max_mem = 500000000
|
175
|
+
bam_sort_core(is_by_name, bam_filename, bam_output_prefix, max_mem)
|
176
|
+
end
|
177
|
+
|
178
|
+
# @abstract Merge multiple sorted BAM.
|
179
|
+
# @param is_by_qname whether to sort by query name
|
180
|
+
# @param out output BAM file name
|
181
|
+
# @param headers name of SAM file from which to copy '@' header lines,
|
182
|
+
# or NULL to copy them from the first file to be merged
|
183
|
+
# @param n number of files to be merged
|
184
|
+
# @param fn names of files to be merged
|
185
|
+
#
|
186
|
+
# @discussion Padding information may NOT correctly maintained. This
|
187
|
+
# function is NOT thread safe.
|
188
|
+
# int bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn,
|
189
|
+
# int flag, const char *reg)
|
190
|
+
|
191
|
+
# attach_function :bam_merge_core, [:int, :string, :string, :int, :pointer, :int, :string], :int
|
192
|
+
# def self.bam_merge(bam_output_file_name, bam_array_input_file_names, rg)
|
193
|
+
# is_by_qname = 0
|
194
|
+
# headers = ""
|
195
|
+
# flag = 0
|
196
|
+
# ary = bam_array_input_file_names.map do |filename|
|
197
|
+
# FFI::MemoryPointer.from_string(filename)
|
198
|
+
# end
|
199
|
+
# ary << nil
|
200
|
+
# fns=FFI::MemoryPointer.new :pointer, ary.size
|
201
|
+
# ary.each_with_index do |p_filename, idx|
|
202
|
+
# fns[idx].put_pointer(0, p_filename)
|
203
|
+
# end
|
204
|
+
#
|
205
|
+
# bam_merge_core(is_by_qname, bam_output_file_name, headers, bam_array_input_file_names.size, fns, flag, rg)
|
206
|
+
# end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008-2009 Genome Research Ltd.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.16
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#require 'rubygems'
|
2
|
+
#require'ffi'
|
3
|
+
#require 'bio/db/sam/bam'
|
4
|
+
module Bio
|
5
|
+
class DB
|
6
|
+
module SAM
|
7
|
+
module Tools
|
8
|
+
extend FFI::Library
|
9
|
+
#ffi_lib "#{File.join(File.expand_path(File.dirname(__FILE__)),'external','libbam.dylib')}"
|
10
|
+
ffi_lib Bio::DB::SAM::Library.filename
|
11
|
+
|
12
|
+
attach_function :fai_build, [ :string ], :int
|
13
|
+
attach_function :fai_destroy, [ :pointer ], :void
|
14
|
+
attach_function :fai_load, [ :string ], :pointer
|
15
|
+
attach_function :fai_fetch, [ :pointer, :string, :pointer ], :string
|
16
|
+
attach_function :faidx_fetch_nseq, [ :pointer ], :int
|
17
|
+
attach_function :faidx_fetch_seq, [ :pointer, :string, :int, :int, :pointer ], :string
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Bio
|
2
|
+
class DB
|
3
|
+
module SAM
|
4
|
+
module Library
|
5
|
+
#IMPORTANT NOTE: Windows library is missing in this distribution
|
6
|
+
|
7
|
+
# Return the path with the file name of the library for the specific operating system
|
8
|
+
def filename
|
9
|
+
#TODO refactor this piece of code in all the files
|
10
|
+
lib_os = case RUBY_PLATFORM
|
11
|
+
when /linux/
|
12
|
+
'so.1'
|
13
|
+
when /darwin/
|
14
|
+
'1.dylib'
|
15
|
+
when /windows/
|
16
|
+
'dll'
|
17
|
+
end
|
18
|
+
|
19
|
+
File.join(File.expand_path(File.dirname(__FILE__)),'external',"libbam.#{lib_os}")
|
20
|
+
end #filename
|
21
|
+
module_function :filename
|
22
|
+
end #Library
|
23
|
+
end #Sam
|
24
|
+
end #DB
|
25
|
+
end #Bio
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# sam.rb
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Created by Ricardo Ramirez on 3/25/10.
|
6
|
+
#
|
7
|
+
# require 'rubygems'
|
8
|
+
# require'ffi'
|
9
|
+
# require 'bio/db/sam/bam'
|
10
|
+
module Bio
|
11
|
+
class DB
|
12
|
+
module SAM
|
13
|
+
module Tools
|
14
|
+
extend FFI::Library
|
15
|
+
|
16
|
+
# ffi_lib "#{File.join(File.expand_path(File.dirname(__FILE__)),'external','libbam.#{lib_os}')}"
|
17
|
+
ffi_lib Bio::DB::SAM::Library.filename
|
18
|
+
|
19
|
+
# typedef struct {
|
20
|
+
# int32_t n_targets;
|
21
|
+
# char **target_name;
|
22
|
+
# uint32_t *target_len;
|
23
|
+
# void *dict, *hash, *rg2lib;
|
24
|
+
# int l_text;
|
25
|
+
# char *text;
|
26
|
+
# } bam_header_t;
|
27
|
+
class BamHeaderT < FFI::Struct
|
28
|
+
layout(
|
29
|
+
:n_targets, :int32_t,
|
30
|
+
:target_name, :pointer,
|
31
|
+
:target_len, :pointer,
|
32
|
+
:dict, :pointer,
|
33
|
+
:hash, :pointer,
|
34
|
+
:rg2lib, :pointer,
|
35
|
+
:l_text, :int,
|
36
|
+
:text, :pointer
|
37
|
+
)
|
38
|
+
def text=(str)
|
39
|
+
@text = FFI::MemoryPointer.from_string(str)
|
40
|
+
self[:text] = @text
|
41
|
+
end
|
42
|
+
def text
|
43
|
+
@text.get_string(0)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
class SamfileTX < FFI::Union
|
49
|
+
layout(
|
50
|
+
:tamr, :pointer, #Text file, read.
|
51
|
+
:bam, :pointer, #bamFile,
|
52
|
+
:tamw, :pointer #Text file, write.
|
53
|
+
)
|
54
|
+
end
|
55
|
+
# typedef struct {
|
56
|
+
# int type;
|
57
|
+
# union {
|
58
|
+
# tamFile tamr;
|
59
|
+
# bamFile bam;
|
60
|
+
# FILE *tamw;
|
61
|
+
# } x;
|
62
|
+
# bam_header_t *header;
|
63
|
+
# } samfile_t;
|
64
|
+
class SamfileT < FFI::Struct
|
65
|
+
layout(
|
66
|
+
:type, :int,
|
67
|
+
:x, SamfileTX,
|
68
|
+
:header, :pointer
|
69
|
+
#:header, BamHeaderT
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
attach_function :samclose, [ :pointer ], :void
|
76
|
+
attach_function :samread, [ :pointer, :pointer ], :int
|
77
|
+
attach_function :samopen, [ :string, :string, :pointer ], :pointer
|
78
|
+
attach_function :samwrite, [ :pointer, :pointer ], :int
|
79
|
+
attach_function :sampileup, [ :pointer, :int, :bam_pileup_f, :pointer ], :int
|
80
|
+
attach_function :samfaipath, [ :string ], :string
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/test/basictest.rb
ADDED
@@ -0,0 +1,308 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
require "test/unit"
|
4
|
+
require "bio/db/sam"
|
5
|
+
require "bio/db/sam/sam"
|
6
|
+
|
7
|
+
|
8
|
+
class TestBioDbSam < Test::Unit::TestCase
|
9
|
+
|
10
|
+
#Set up the paths
|
11
|
+
def setup
|
12
|
+
@test_folder = "test/samples/small"
|
13
|
+
@testTAMFile = @test_folder + "/test.tam"
|
14
|
+
@testBAMFile = @test_folder + "/testu.bam"
|
15
|
+
@testReference = @test_folder + "/test_chr.fasta"
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
#Removing the index files
|
20
|
+
def teardown
|
21
|
+
begin
|
22
|
+
File.delete(@testReference + ".fai")
|
23
|
+
p "deleted: " + @testReference + ".fai "
|
24
|
+
rescue
|
25
|
+
end
|
26
|
+
begin
|
27
|
+
File.delete(@testBAMFile + ".fai")
|
28
|
+
p "deleted: " + @testBAMFile + ".bai "
|
29
|
+
rescue
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def default_test
|
34
|
+
puts $LOAD_PATH
|
35
|
+
assert(true, "Unit test test")
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_openSAMFile
|
39
|
+
bamfile = Bio::DB::SAM::Tools.samopen(@testTAMFile,"r",nil)
|
40
|
+
Bio::DB::SAM::Tools.samclose(bamfile)
|
41
|
+
assert(true, "file open and closed")
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_new_class_empty
|
45
|
+
begin
|
46
|
+
bam = Bio::DB::Sam.new({})
|
47
|
+
assert(false, "Should fail while opening without parameters")
|
48
|
+
rescue Bio::DB::SAMException => e
|
49
|
+
puts e.message
|
50
|
+
assert(true, e.message)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_new_class_empty_invalid_path
|
55
|
+
begin
|
56
|
+
sam = Bio::DB::Sam.new({:bam=>"INVALID"})
|
57
|
+
sam.open
|
58
|
+
sam.close
|
59
|
+
assert(false, "Should fail with an invalid path")
|
60
|
+
rescue Bio::DB::SAMException => e
|
61
|
+
puts e.message
|
62
|
+
assert(true, e.message)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_class_text_read_no_faidx
|
67
|
+
sam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
68
|
+
sam.open
|
69
|
+
sam.close
|
70
|
+
assert(true, "file open and closed with the class")
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_class_text_read_no_close
|
74
|
+
|
75
|
+
fam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
76
|
+
fam.open
|
77
|
+
fam = nil
|
78
|
+
ObjectSpace.garbage_collect
|
79
|
+
|
80
|
+
assert(true, "file openend but not closed")
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_class_binary_read_no_close
|
84
|
+
|
85
|
+
Bio::DB::Sam.new({:bam=>@testBAMFile}).open
|
86
|
+
ObjectSpace.garbage_collect
|
87
|
+
assert(true, "BINARY file openend but not closed")
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_read_coverage
|
91
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile, :fasta=>@testReference})
|
92
|
+
sam.open
|
93
|
+
File.open( @test_folder +"/ids2.txt", "r") do |file|
|
94
|
+
puts "file opened"
|
95
|
+
file.each_line{|line|
|
96
|
+
fetching = line.split(' ')[0]
|
97
|
+
puts "fetching: " + fetching
|
98
|
+
sam.load_reference
|
99
|
+
seq = sam.fetch_reference(fetching, 0, 16000)
|
100
|
+
# puts seq
|
101
|
+
# puts seq.length
|
102
|
+
als = sam.fetch(fetching, 0, seq.length)
|
103
|
+
# p als
|
104
|
+
if als.length() > 0 then
|
105
|
+
p fetching
|
106
|
+
p als
|
107
|
+
end
|
108
|
+
}
|
109
|
+
|
110
|
+
end
|
111
|
+
sam.close
|
112
|
+
assert(true, "Finish")
|
113
|
+
end
|
114
|
+
# def test_read_TAM_as_BAM
|
115
|
+
# begin
|
116
|
+
# sam = Bio::DB::Sam.new({:bam=>@testTAMFile})
|
117
|
+
# sam.open
|
118
|
+
# sam.close
|
119
|
+
# assert(false, "Should raise an exception for reading a BAM as TAM")
|
120
|
+
# rescue Bio::DB::SAMException => e
|
121
|
+
# assert(true, "Properly handled")
|
122
|
+
# end
|
123
|
+
# end
|
124
|
+
|
125
|
+
# def test_read_BAM_as_TAM
|
126
|
+
# begin
|
127
|
+
# sam = Bio::DB::Sam.new({:tam=>@testBAMFile})
|
128
|
+
# sam.open
|
129
|
+
# sam.close
|
130
|
+
# assert(false, "Should raise an exception for reading a BAM as TAM")
|
131
|
+
# rescue Bio::DB::SAMException => e
|
132
|
+
# assert(true, "Properly handled")
|
133
|
+
# end
|
134
|
+
# end
|
135
|
+
|
136
|
+
def test_bam_load_index
|
137
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
138
|
+
sam.open
|
139
|
+
index = sam.load_index
|
140
|
+
sam.close
|
141
|
+
assert(true, "BAM index loaded")
|
142
|
+
# attach_function :bam_index_build, [ :string ], :int
|
143
|
+
# attach_function :bam_index_load, [ :string ], :pointer
|
144
|
+
# attach_function :bam_index_destroy, [ :pointer ], :void
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_tam_load_index
|
148
|
+
begin
|
149
|
+
sam = Bio::DB::Sam.new({:tam=>@testTAMFile})
|
150
|
+
sam.open
|
151
|
+
sam.load_index
|
152
|
+
sam.close
|
153
|
+
assert(false, "TAM index loaded")
|
154
|
+
rescue Bio::DB::SAMException => e
|
155
|
+
assert(true, "Unable to load an index for a TAM file")
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_read_segment
|
160
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
161
|
+
sam.open
|
162
|
+
als = sam.fetch("chr_1", 0, 500)
|
163
|
+
p als
|
164
|
+
sam.close
|
165
|
+
assert(true, "Seems it ran the query")
|
166
|
+
#node_7263 238 60 has 550+, query from 0 to 500, something shall come....
|
167
|
+
end
|
168
|
+
|
169
|
+
def test_read_invalid_reference
|
170
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
171
|
+
sam.open
|
172
|
+
begin
|
173
|
+
als = sam.fetch("Chr1", 0, 500)
|
174
|
+
p als
|
175
|
+
sam.close
|
176
|
+
assert(false, "Seems it ran the query")
|
177
|
+
rescue Bio::DB::SAMException => e
|
178
|
+
p e
|
179
|
+
assert(true, "Exception generated and catched")
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def test_read_invalid_reference_start_coordinate
|
184
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
185
|
+
sam.open
|
186
|
+
begin
|
187
|
+
als = sam.fetch("chr", -1, 500)
|
188
|
+
p als
|
189
|
+
sam.close
|
190
|
+
assert(false, "Seems it ran the query")
|
191
|
+
rescue Bio::DB::SAMException => e
|
192
|
+
p e
|
193
|
+
assert(true, "Exception generated and catched")
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_read_invalid_reference_end_coordinate
|
198
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
199
|
+
sam.open
|
200
|
+
begin
|
201
|
+
als = sam.fetch("chr", 0, 50000)
|
202
|
+
p als
|
203
|
+
sam.close
|
204
|
+
assert(false, "Seems it ran the query")
|
205
|
+
rescue Bio::DB::SAMException => e
|
206
|
+
p e
|
207
|
+
assert(true, "Exception generated and catched")
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def test_read_invalid_reference_swaped_coordinates
|
212
|
+
sam = Bio::DB::Sam.new({:bam=>@testBAMFile})
|
213
|
+
sam.open
|
214
|
+
begin
|
215
|
+
als = sam.fetch("chr", 500, 0)
|
216
|
+
p als
|
217
|
+
sam.close
|
218
|
+
assert(false, "Seems it ran the query")
|
219
|
+
rescue Bio::DB::SAMException => e
|
220
|
+
p e
|
221
|
+
assert(true, "Exception generated and catched")
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def test_fasta_load_index
|
226
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference})
|
227
|
+
sam.load_reference
|
228
|
+
seq = sam.fetch_reference("chr_1", 0, 500)
|
229
|
+
p seq
|
230
|
+
sam.close
|
231
|
+
assert(true, "The reference was loaded")
|
232
|
+
end
|
233
|
+
|
234
|
+
def test_fasta_load_index
|
235
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference})
|
236
|
+
sam.load_reference
|
237
|
+
begin
|
238
|
+
seq = sam.fetch_reference("chr1", 0, 500)
|
239
|
+
p "Error seq:"+ seq
|
240
|
+
sam.close
|
241
|
+
assert(false, "The reference was loaded")
|
242
|
+
rescue Bio::DB::SAMException => e
|
243
|
+
p e
|
244
|
+
assert(true, "The references was not loaded")
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_load_feature
|
249
|
+
|
250
|
+
fs = Feature.find_by_bam("chr_1", 0, 500,@testBAMFile)
|
251
|
+
|
252
|
+
p fs
|
253
|
+
assert(true, "Loaded as features")
|
254
|
+
end
|
255
|
+
|
256
|
+
def test_avg_coverage
|
257
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference, :bam=>@testBAMFile })
|
258
|
+
sam.open
|
259
|
+
cov = sam.average_coverage("chr_1", 60, 30)
|
260
|
+
p "Coverage: " + cov.to_s
|
261
|
+
sam.close
|
262
|
+
assert(true, "Average coverage ran")
|
263
|
+
assert(3 == cov, "The coverage is 3")
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def test_chromosome_coverage
|
268
|
+
sam = Bio::DB::Sam.new({:fasta=>@testReference, :bam=>@testBAMFile })
|
269
|
+
sam.open
|
270
|
+
covs = sam.chromosome_coverage("chr_1", 0, 60)
|
271
|
+
p "Coverage: "
|
272
|
+
p covs
|
273
|
+
puts "POS\tCOV"
|
274
|
+
covs.each_with_index{ |cov, i| puts "#{i}\t#{cov}" }
|
275
|
+
sam.close
|
276
|
+
assert(true, "Average coverage ran")
|
277
|
+
#assert(3 == cov, "The coverage is 3")
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
281
|
+
|
282
|
+
class Feature
|
283
|
+
attr_reader :start, :end, :strand, :sequence, :quality
|
284
|
+
|
285
|
+
def initialize(a={})
|
286
|
+
p a
|
287
|
+
@start = a[:start]
|
288
|
+
@end = a[:enf]
|
289
|
+
@strand = a[:strand]
|
290
|
+
@sequence = a[:sequence]
|
291
|
+
@quality = a[:quality]
|
292
|
+
end
|
293
|
+
|
294
|
+
def self.find_by_bam(reference,start,stop,bam_file_path)
|
295
|
+
|
296
|
+
sam = Bio::DB::Sam.new({:bam=>bam_file_path})
|
297
|
+
features = []
|
298
|
+
sam.open
|
299
|
+
|
300
|
+
fetchAlignment = Proc.new do |a|
|
301
|
+
a.query_strand ? strand = '+' : strand = '-'
|
302
|
+
features << Feature.new({:start=>a.pos,:end=>a.calend,:strand=>strand,:sequence=>a.seq,:quality=>a.qual})
|
303
|
+
end
|
304
|
+
sam.fetch_with_function(reference, start, stop, fetchAlignment)
|
305
|
+
sam.close
|
306
|
+
features
|
307
|
+
end
|
308
|
+
end
|