htslib 0.2.0 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -6
- data/lib/hts/bam/{aux.rb → auxi.rb} +6 -0
- data/lib/hts/bam/record.rb +1 -1
- data/lib/hts/bam.rb +46 -14
- data/lib/hts/bcf/header_record.rb +11 -0
- data/lib/hts/bcf/record.rb +1 -1
- data/lib/hts/bcf.rb +84 -11
- data/lib/hts/faidx.rb +40 -9
- data/lib/hts/hts.rb +11 -6
- data/lib/hts/libhts/constants.rb +20 -19
- data/lib/hts/libhts/cram.rb +64 -0
- data/lib/hts/libhts/sam.rb +12 -0
- data/lib/hts/libhts/sam_funcs.rb +60 -5
- data/lib/hts/libhts/tbx.rb +1 -1
- data/lib/hts/libhts/tbx_funcs.rb +25 -0
- data/lib/hts/libhts/vcf.rb +114 -5
- data/lib/hts/libhts/vcf_funcs.rb +81 -3
- data/lib/hts/libhts.rb +2 -1
- data/lib/hts/tbx.rb +37 -5
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +15 -1
- metadata +7 -4
data/lib/hts/libhts/cram.rb
CHANGED
@@ -7,6 +7,8 @@ module HTS
|
|
7
7
|
typedef :pointer, :cram_block
|
8
8
|
typedef :pointer, :cram_metrics
|
9
9
|
|
10
|
+
# cram_fd
|
11
|
+
|
10
12
|
attach_function \
|
11
13
|
:cram_fd_get_header,
|
12
14
|
[:cram_fd],
|
@@ -82,6 +84,8 @@ module HTS
|
|
82
84
|
[:cram_fd],
|
83
85
|
:int
|
84
86
|
|
87
|
+
# cram_block
|
88
|
+
|
85
89
|
attach_function \
|
86
90
|
:cram_block_get_content_id,
|
87
91
|
[:cram_block],
|
@@ -157,46 +161,88 @@ module HTS
|
|
157
161
|
%i[cram_block size_t],
|
158
162
|
:void
|
159
163
|
|
164
|
+
# Computes the size of a cram block, including the block header itself.
|
160
165
|
attach_function \
|
161
166
|
:cram_block_size,
|
162
167
|
[:cram_block],
|
163
168
|
:uint32
|
164
169
|
|
170
|
+
# Renumbers RG numbers in a cram compression header.
|
165
171
|
attach_function \
|
166
172
|
:cram_transcode_rg,
|
167
173
|
%i[cram_fd cram_fd cram_container int pointer pointer],
|
168
174
|
:int
|
169
175
|
|
176
|
+
# Copies the blocks representing the next num_slice slices from a
|
177
|
+
# container from 'in' to 'out'.
|
170
178
|
attach_function \
|
171
179
|
:cram_copy_slice,
|
172
180
|
%i[cram_fd cram_fd int32],
|
173
181
|
:int
|
174
182
|
|
183
|
+
# Returns the number of cram blocks within this slice.
|
184
|
+
attach_function \
|
185
|
+
:cram_slice_hdr_get_num_blocks,
|
186
|
+
[:pointer],
|
187
|
+
:int32
|
188
|
+
|
189
|
+
# Returns the block content_id for the block containing an embedded
|
190
|
+
# reference sequence.
|
191
|
+
attach_function \
|
192
|
+
:cram_slice_hdr_get_embed_ref_id,
|
193
|
+
[:pointer],
|
194
|
+
:int
|
195
|
+
|
196
|
+
# Returns slice reference ID, start and span (length) coordinates.
|
197
|
+
attach_function \
|
198
|
+
:cram_slice_hdr_get_coords,
|
199
|
+
%i[pointer pointer pointer pointer],
|
200
|
+
:void
|
201
|
+
|
202
|
+
# Decodes a slice header from a cram block.
|
203
|
+
attach_function \
|
204
|
+
:cram_decode_slice_header,
|
205
|
+
%i[pointer pointer],
|
206
|
+
:pointer
|
207
|
+
|
208
|
+
# Frees a cram_block_slice_hdr structure.
|
209
|
+
attach_function \
|
210
|
+
:cram_free_slice_header,
|
211
|
+
[:pointer],
|
212
|
+
:void
|
213
|
+
|
214
|
+
# Allocates a new cram_block structure with a specified content_type
|
215
|
+
# and id.
|
175
216
|
attach_function \
|
176
217
|
:cram_new_block,
|
177
218
|
[CramContentType, :int],
|
178
219
|
:cram_block
|
179
220
|
|
221
|
+
# Reads a block from a cram file.
|
180
222
|
attach_function \
|
181
223
|
:cram_read_block,
|
182
224
|
[:cram_fd],
|
183
225
|
:cram_block
|
184
226
|
|
227
|
+
# Writes a CRAM block.
|
185
228
|
attach_function \
|
186
229
|
:cram_write_block,
|
187
230
|
%i[cram_fd cram_block],
|
188
231
|
:int
|
189
232
|
|
233
|
+
# Frees a CRAM block, deallocating internal data too.
|
190
234
|
attach_function \
|
191
235
|
:cram_free_block,
|
192
236
|
[:cram_block],
|
193
237
|
:void
|
194
238
|
|
239
|
+
# Uncompresses a CRAM block, if compressed.
|
195
240
|
attach_function \
|
196
241
|
:cram_uncompress_block,
|
197
242
|
[:cram_block],
|
198
243
|
:int
|
199
244
|
|
245
|
+
# Compresses a block.
|
200
246
|
attach_function \
|
201
247
|
:cram_compress_block,
|
202
248
|
%i[cram_fd cram_block cram_metrics int int],
|
@@ -207,6 +253,8 @@ module HTS
|
|
207
253
|
# %i[cram_fd cram_slice cram_block cram_metrics int int],
|
208
254
|
# :int
|
209
255
|
|
256
|
+
# Creates a new container, specifying the maximum number of slices
|
257
|
+
# and records permitted.
|
210
258
|
attach_function \
|
211
259
|
:cram_new_container,
|
212
260
|
%i[int int],
|
@@ -217,16 +265,20 @@ module HTS
|
|
217
265
|
[:cram_container],
|
218
266
|
:void
|
219
267
|
|
268
|
+
# Reads a container header.
|
220
269
|
attach_function \
|
221
270
|
:cram_read_container,
|
222
271
|
[:cram_fd],
|
223
272
|
:cram_container
|
224
273
|
|
274
|
+
# Writes a container structure.
|
225
275
|
attach_function \
|
226
276
|
:cram_write_container,
|
227
277
|
%i[cram_fd cram_container],
|
228
278
|
:int
|
229
279
|
|
280
|
+
# Stores the container structure in dat and returns *size as the
|
281
|
+
# number of bytes written to dat[].
|
230
282
|
attach_function \
|
231
283
|
:cram_store_container,
|
232
284
|
%i[cram_fd cram_container string pointer],
|
@@ -237,61 +289,73 @@ module HTS
|
|
237
289
|
[:cram_container],
|
238
290
|
:int
|
239
291
|
|
292
|
+
# Opens a CRAM file for read (mode "rb") or write ("wb").
|
240
293
|
attach_function \
|
241
294
|
:cram_open,
|
242
295
|
%i[string string],
|
243
296
|
:cram_fd
|
244
297
|
|
298
|
+
# Opens an existing stream for reading or writing.
|
245
299
|
attach_function \
|
246
300
|
:cram_dopen,
|
247
301
|
%i[pointer string string],
|
248
302
|
:cram_fd
|
249
303
|
|
304
|
+
# Closes a CRAM file.
|
250
305
|
attach_function \
|
251
306
|
:cram_close,
|
252
307
|
[:cram_fd],
|
253
308
|
:int
|
254
309
|
|
310
|
+
# Seek within a CRAM file.
|
255
311
|
attach_function \
|
256
312
|
:cram_seek,
|
257
313
|
%i[pointer off_t int],
|
258
314
|
:int # FIXME: pointer should be :cram_fd
|
259
315
|
|
316
|
+
# Flushes a CRAM file.
|
260
317
|
attach_function \
|
261
318
|
:cram_flush,
|
262
319
|
[:cram_fd],
|
263
320
|
:int
|
264
321
|
|
322
|
+
# Checks for end of file on a cram_fd stream.
|
265
323
|
attach_function \
|
266
324
|
:cram_eof,
|
267
325
|
[:cram_fd],
|
268
326
|
:int
|
269
327
|
|
328
|
+
# Sets options on the cram_fd.
|
270
329
|
attach_function \
|
271
330
|
:cram_set_option,
|
272
331
|
[:cram_fd, HtsFmtOption, :varargs],
|
273
332
|
:int
|
274
333
|
|
334
|
+
# Sets options on the cram_fd.
|
275
335
|
attach_function \
|
276
336
|
:cram_set_voption,
|
277
337
|
[:cram_fd, HtsFmtOption, :pointer], # va_list
|
278
338
|
:int
|
279
339
|
|
340
|
+
# Attaches a header to a cram_fd.
|
280
341
|
attach_function \
|
281
342
|
:cram_set_header,
|
282
343
|
[:cram_fd, SamHdr.by_ref],
|
283
344
|
:int
|
284
345
|
|
346
|
+
# Check if this file has a proper EOF block
|
285
347
|
attach_function \
|
286
348
|
:cram_check_EOF,
|
287
349
|
[:cram_fd],
|
288
350
|
:int
|
289
351
|
|
352
|
+
# As int32_decoded/encode, but from/to blocks instead of cram_fd
|
290
353
|
attach_function \
|
291
354
|
:int32_put_blk,
|
292
355
|
%i[cram_block int32_t],
|
293
356
|
:int
|
294
357
|
|
358
|
+
# Returns the refs_t structure used by a cram file handle.
|
295
359
|
attach_function \
|
296
360
|
:cram_get_refs,
|
297
361
|
[HtsFile.by_ref],
|
data/lib/hts/libhts/sam.rb
CHANGED
@@ -685,6 +685,18 @@ module HTS
|
|
685
685
|
:bam_mods_at_qpos,
|
686
686
|
[Bam1, :int, :pointer, :pointer, :int],
|
687
687
|
:int
|
688
|
+
|
689
|
+
# Returns data about a specific modification type for the alignment record.
|
690
|
+
attach_function \
|
691
|
+
:bam_mods_query_type,
|
692
|
+
%i[pointer int pointer pointer string],
|
693
|
+
:int
|
694
|
+
|
695
|
+
# Returns the list of base modification codes provided for this
|
696
|
+
attach_function \
|
697
|
+
:bam_mods_recorded,
|
698
|
+
%i[pointer pointer],
|
699
|
+
:int
|
688
700
|
end
|
689
701
|
end
|
690
702
|
|
data/lib/hts/libhts/sam_funcs.rb
CHANGED
@@ -30,15 +30,15 @@ module HTS
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def bam_cigar_opchr(c)
|
33
|
-
|
33
|
+
"#{BAM_CIGAR_STR}??????"[bam_cigar_op(c)]
|
34
34
|
end
|
35
35
|
|
36
36
|
def bam_cigar_gen(l, o)
|
37
|
-
l << BAM_CIGAR_SHIFT | o
|
37
|
+
(l << BAM_CIGAR_SHIFT) | o
|
38
38
|
end
|
39
39
|
|
40
40
|
def bam_cigar_type(o)
|
41
|
-
BAM_CIGAR_TYPE >> (o << 1) & 3
|
41
|
+
(BAM_CIGAR_TYPE >> (o << 1)) & 3
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
@@ -58,43 +58,98 @@ module HTS
|
|
58
58
|
# macros
|
59
59
|
# function-like macros
|
60
60
|
class << self
|
61
|
+
# Get whether the query is on the reverse strand
|
61
62
|
def bam_is_rev(b)
|
62
63
|
b[:core][:flag] & BAM_FREVERSE != 0
|
63
64
|
end
|
64
65
|
|
66
|
+
# Get whether the query's mate is on the reverse strand
|
65
67
|
def bam_is_mrev(b)
|
66
68
|
b[:core][:flag] & BAM_FMREVERSE != 0
|
67
69
|
end
|
68
70
|
|
71
|
+
# Get the name of the query
|
69
72
|
def bam_get_qname(b)
|
70
73
|
b[:data]
|
71
74
|
end
|
72
75
|
|
76
|
+
# Get the CIGAR array
|
73
77
|
def bam_get_cigar(b)
|
74
78
|
b[:data] + b[:core][:l_qname]
|
75
79
|
end
|
76
80
|
|
81
|
+
# Get query sequence
|
77
82
|
def bam_get_seq(b)
|
78
83
|
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
|
79
84
|
end
|
80
85
|
|
86
|
+
# Get query quality
|
81
87
|
def bam_get_qual(b)
|
82
88
|
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
|
83
89
|
end
|
84
90
|
|
91
|
+
# Get auxiliary data
|
85
92
|
def bam_get_aux(b)
|
86
93
|
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
|
87
94
|
end
|
88
95
|
|
96
|
+
# Get length of auxiliary data
|
89
97
|
def bam_get_l_aux(b)
|
90
98
|
b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
|
91
99
|
end
|
92
100
|
|
101
|
+
# Get a base on read
|
93
102
|
def bam_seqi(s, i)
|
94
|
-
s[
|
103
|
+
(s[i >> 1].read_uint8 >> ((~i & 1) << 2)) & 0xf
|
95
104
|
end
|
96
105
|
|
97
|
-
#
|
106
|
+
# Modifies a single base in the bam structure.
|
107
|
+
def bam_set_seqi(s, i, b)
|
108
|
+
s[i >> 1] = (s[i >> 1] & (0xf0 >> ((~i & 1) << 2))) | ((b) << ((~i & 1) << 2))
|
109
|
+
end
|
110
|
+
|
111
|
+
# Returns the SAM formatted text of the \@HD header line
|
112
|
+
def sam_hdr_find_hd(h, ks)
|
113
|
+
sam_hdr_find_line_id(h, "HD", nil, nil, ks)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns the value associated with a given \@HD line tag
|
117
|
+
def sam_hdr_find_tag_hd(h, key, ks)
|
118
|
+
sam_hdr_find_tag_id(h, "HD", nil, nil, key, ks)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Adds or updates tags on the header \@HD line
|
122
|
+
def sam_hdr_update_hd(h, *args)
|
123
|
+
sam_hdr_update_line(h, "HD", nil, nil, *args, nil)
|
124
|
+
end
|
125
|
+
|
126
|
+
# Removes the \@HD line tag with the given key
|
127
|
+
def sam_hdr_remove_tag_hd(h, key)
|
128
|
+
sam_hdr_remove_tag_id(h, "HD", nil, nil, key)
|
129
|
+
end
|
130
|
+
|
131
|
+
BAM_USER_OWNS_STRUCT = 1
|
132
|
+
BAM_USER_OWNS_DATA = 2
|
133
|
+
|
134
|
+
alias bam_itr_destroy hts_itr_destroy
|
135
|
+
alias bam_itr_queryi sam_itr_queryi
|
136
|
+
alias bam_itr_querys sam_itr_querys
|
137
|
+
alias bam_itr_next sam_itr_next
|
138
|
+
|
139
|
+
# Load/build .csi or .bai BAM index file. Does not work with CRAM.
|
140
|
+
# It is recommended to use the sam_index_* functions below instead.
|
141
|
+
def bam_index_load(fn)
|
142
|
+
hts_idx_load(fn, HTS_FMT_BAI)
|
143
|
+
end
|
144
|
+
|
145
|
+
alias bam_index_build sam_index_build
|
146
|
+
|
147
|
+
alias sam_itr_destroy hts_itr_destroy
|
148
|
+
|
149
|
+
alias sam_open hts_open
|
150
|
+
alias sam_open_format hts_open_format
|
151
|
+
alias sam_flush hts_flush
|
152
|
+
alias sam_close hts_close
|
98
153
|
end
|
99
154
|
end
|
100
155
|
end
|
data/lib/hts/libhts/tbx.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
module HTS
|
2
|
+
module LibHTS
|
3
|
+
class << self
|
4
|
+
def tbx_itr_destroy(iter)
|
5
|
+
hts_itr_destroy(iter)
|
6
|
+
end
|
7
|
+
|
8
|
+
def tbx_itr_queryi(tbx, tid, beg, end_)
|
9
|
+
hts_itr_query(tbx[:idx], tid, beg, end_, tbx_readrec)
|
10
|
+
end
|
11
|
+
|
12
|
+
def tbx_itr_querys(tbx, s)
|
13
|
+
hts_itr_querys(tbx[:idx], s, @@tbx_name2id, tbx, @@hts_itr_query, @@tbx_readrec)
|
14
|
+
end
|
15
|
+
|
16
|
+
def tbx_itr_next(htsfp, tbx, itr, r)
|
17
|
+
hts_itr_next(hts_get_bgzfp(htsfp), itr, r, tbx)
|
18
|
+
end
|
19
|
+
|
20
|
+
def tbx_bgzf_itr_next(bgzfp, tbx, itr, r)
|
21
|
+
hts_itr_next(bgzfp, itr, r, tbx)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|