htslib 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +19 -6
- data/lib/hts/bam/{aux.rb → auxi.rb} +6 -0
- data/lib/hts/bam/record.rb +1 -1
- data/lib/hts/bam.rb +46 -14
- data/lib/hts/bcf/header_record.rb +11 -0
- data/lib/hts/bcf/record.rb +1 -1
- data/lib/hts/bcf.rb +84 -11
- data/lib/hts/faidx.rb +40 -9
- data/lib/hts/hts.rb +11 -6
- data/lib/hts/libhts/constants.rb +20 -19
- data/lib/hts/libhts/cram.rb +64 -0
- data/lib/hts/libhts/sam.rb +12 -0
- data/lib/hts/libhts/sam_funcs.rb +60 -5
- data/lib/hts/libhts/tbx.rb +1 -1
- data/lib/hts/libhts/tbx_funcs.rb +25 -0
- data/lib/hts/libhts/vcf.rb +114 -5
- data/lib/hts/libhts/vcf_funcs.rb +81 -3
- data/lib/hts/libhts.rb +2 -1
- data/lib/hts/tbx.rb +37 -5
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +15 -1
- metadata +7 -4
data/lib/hts/libhts/cram.rb
CHANGED
@@ -7,6 +7,8 @@ module HTS
|
|
7
7
|
typedef :pointer, :cram_block
|
8
8
|
typedef :pointer, :cram_metrics
|
9
9
|
|
10
|
+
# cram_fd
|
11
|
+
|
10
12
|
attach_function \
|
11
13
|
:cram_fd_get_header,
|
12
14
|
[:cram_fd],
|
@@ -82,6 +84,8 @@ module HTS
|
|
82
84
|
[:cram_fd],
|
83
85
|
:int
|
84
86
|
|
87
|
+
# cram_block
|
88
|
+
|
85
89
|
attach_function \
|
86
90
|
:cram_block_get_content_id,
|
87
91
|
[:cram_block],
|
@@ -157,46 +161,88 @@ module HTS
|
|
157
161
|
%i[cram_block size_t],
|
158
162
|
:void
|
159
163
|
|
164
|
+
# Computes the size of a cram block, including the block header itself.
|
160
165
|
attach_function \
|
161
166
|
:cram_block_size,
|
162
167
|
[:cram_block],
|
163
168
|
:uint32
|
164
169
|
|
170
|
+
# Renumbers RG numbers in a cram compression header.
|
165
171
|
attach_function \
|
166
172
|
:cram_transcode_rg,
|
167
173
|
%i[cram_fd cram_fd cram_container int pointer pointer],
|
168
174
|
:int
|
169
175
|
|
176
|
+
# Copies the blocks representing the next num_slice slices from a
|
177
|
+
# container from 'in' to 'out'.
|
170
178
|
attach_function \
|
171
179
|
:cram_copy_slice,
|
172
180
|
%i[cram_fd cram_fd int32],
|
173
181
|
:int
|
174
182
|
|
183
|
+
# Returns the number of cram blocks within this slice.
|
184
|
+
attach_function \
|
185
|
+
:cram_slice_hdr_get_num_blocks,
|
186
|
+
[:pointer],
|
187
|
+
:int32
|
188
|
+
|
189
|
+
# Returns the block content_id for the block containing an embedded
|
190
|
+
# reference sequence.
|
191
|
+
attach_function \
|
192
|
+
:cram_slice_hdr_get_embed_ref_id,
|
193
|
+
[:pointer],
|
194
|
+
:int
|
195
|
+
|
196
|
+
# Returns slice reference ID, start and span (length) coordinates.
|
197
|
+
attach_function \
|
198
|
+
:cram_slice_hdr_get_coords,
|
199
|
+
%i[pointer pointer pointer pointer],
|
200
|
+
:void
|
201
|
+
|
202
|
+
# Decodes a slice header from a cram block.
|
203
|
+
attach_function \
|
204
|
+
:cram_decode_slice_header,
|
205
|
+
%i[pointer pointer],
|
206
|
+
:pointer
|
207
|
+
|
208
|
+
# Frees a cram_block_slice_hdr structure.
|
209
|
+
attach_function \
|
210
|
+
:cram_free_slice_header,
|
211
|
+
[:pointer],
|
212
|
+
:void
|
213
|
+
|
214
|
+
# Allocates a new cram_block structure with a specified content_type
|
215
|
+
# and id.
|
175
216
|
attach_function \
|
176
217
|
:cram_new_block,
|
177
218
|
[CramContentType, :int],
|
178
219
|
:cram_block
|
179
220
|
|
221
|
+
# Reads a block from a cram file.
|
180
222
|
attach_function \
|
181
223
|
:cram_read_block,
|
182
224
|
[:cram_fd],
|
183
225
|
:cram_block
|
184
226
|
|
227
|
+
# Writes a CRAM block.
|
185
228
|
attach_function \
|
186
229
|
:cram_write_block,
|
187
230
|
%i[cram_fd cram_block],
|
188
231
|
:int
|
189
232
|
|
233
|
+
# Frees a CRAM block, deallocating internal data too.
|
190
234
|
attach_function \
|
191
235
|
:cram_free_block,
|
192
236
|
[:cram_block],
|
193
237
|
:void
|
194
238
|
|
239
|
+
# Uncompresses a CRAM block, if compressed.
|
195
240
|
attach_function \
|
196
241
|
:cram_uncompress_block,
|
197
242
|
[:cram_block],
|
198
243
|
:int
|
199
244
|
|
245
|
+
# Compresses a block.
|
200
246
|
attach_function \
|
201
247
|
:cram_compress_block,
|
202
248
|
%i[cram_fd cram_block cram_metrics int int],
|
@@ -207,6 +253,8 @@ module HTS
|
|
207
253
|
# %i[cram_fd cram_slice cram_block cram_metrics int int],
|
208
254
|
# :int
|
209
255
|
|
256
|
+
# Creates a new container, specifying the maximum number of slices
|
257
|
+
# and records permitted.
|
210
258
|
attach_function \
|
211
259
|
:cram_new_container,
|
212
260
|
%i[int int],
|
@@ -217,16 +265,20 @@ module HTS
|
|
217
265
|
[:cram_container],
|
218
266
|
:void
|
219
267
|
|
268
|
+
# Reads a container header.
|
220
269
|
attach_function \
|
221
270
|
:cram_read_container,
|
222
271
|
[:cram_fd],
|
223
272
|
:cram_container
|
224
273
|
|
274
|
+
# Writes a container structure.
|
225
275
|
attach_function \
|
226
276
|
:cram_write_container,
|
227
277
|
%i[cram_fd cram_container],
|
228
278
|
:int
|
229
279
|
|
280
|
+
# Stores the container structure in dat and returns *size as the
|
281
|
+
# number of bytes written to dat[].
|
230
282
|
attach_function \
|
231
283
|
:cram_store_container,
|
232
284
|
%i[cram_fd cram_container string pointer],
|
@@ -237,61 +289,73 @@ module HTS
|
|
237
289
|
[:cram_container],
|
238
290
|
:int
|
239
291
|
|
292
|
+
# Opens a CRAM file for read (mode "rb") or write ("wb").
|
240
293
|
attach_function \
|
241
294
|
:cram_open,
|
242
295
|
%i[string string],
|
243
296
|
:cram_fd
|
244
297
|
|
298
|
+
# Opens an existing stream for reading or writing.
|
245
299
|
attach_function \
|
246
300
|
:cram_dopen,
|
247
301
|
%i[pointer string string],
|
248
302
|
:cram_fd
|
249
303
|
|
304
|
+
# Closes a CRAM file.
|
250
305
|
attach_function \
|
251
306
|
:cram_close,
|
252
307
|
[:cram_fd],
|
253
308
|
:int
|
254
309
|
|
310
|
+
# Seek within a CRAM file.
|
255
311
|
attach_function \
|
256
312
|
:cram_seek,
|
257
313
|
%i[pointer off_t int],
|
258
314
|
:int # FIXME: pointer should be :cram_fd
|
259
315
|
|
316
|
+
# Flushes a CRAM file.
|
260
317
|
attach_function \
|
261
318
|
:cram_flush,
|
262
319
|
[:cram_fd],
|
263
320
|
:int
|
264
321
|
|
322
|
+
# Checks for end of file on a cram_fd stream.
|
265
323
|
attach_function \
|
266
324
|
:cram_eof,
|
267
325
|
[:cram_fd],
|
268
326
|
:int
|
269
327
|
|
328
|
+
# Sets options on the cram_fd.
|
270
329
|
attach_function \
|
271
330
|
:cram_set_option,
|
272
331
|
[:cram_fd, HtsFmtOption, :varargs],
|
273
332
|
:int
|
274
333
|
|
334
|
+
# Sets options on the cram_fd.
|
275
335
|
attach_function \
|
276
336
|
:cram_set_voption,
|
277
337
|
[:cram_fd, HtsFmtOption, :pointer], # va_list
|
278
338
|
:int
|
279
339
|
|
340
|
+
# Attaches a header to a cram_fd.
|
280
341
|
attach_function \
|
281
342
|
:cram_set_header,
|
282
343
|
[:cram_fd, SamHdr.by_ref],
|
283
344
|
:int
|
284
345
|
|
346
|
+
# Check if this file has a proper EOF block
|
285
347
|
attach_function \
|
286
348
|
:cram_check_EOF,
|
287
349
|
[:cram_fd],
|
288
350
|
:int
|
289
351
|
|
352
|
+
# As int32_decoded/encode, but from/to blocks instead of cram_fd
|
290
353
|
attach_function \
|
291
354
|
:int32_put_blk,
|
292
355
|
%i[cram_block int32_t],
|
293
356
|
:int
|
294
357
|
|
358
|
+
# Returns the refs_t structure used by a cram file handle.
|
295
359
|
attach_function \
|
296
360
|
:cram_get_refs,
|
297
361
|
[HtsFile.by_ref],
|
data/lib/hts/libhts/sam.rb
CHANGED
@@ -685,6 +685,18 @@ module HTS
|
|
685
685
|
:bam_mods_at_qpos,
|
686
686
|
[Bam1, :int, :pointer, :pointer, :int],
|
687
687
|
:int
|
688
|
+
|
689
|
+
# Returns data about a specific modification type for the alignment record.
|
690
|
+
attach_function \
|
691
|
+
:bam_mods_query_type,
|
692
|
+
%i[pointer int pointer pointer string],
|
693
|
+
:int
|
694
|
+
|
695
|
+
# Returns the list of base modification codes provided for this
|
696
|
+
attach_function \
|
697
|
+
:bam_mods_recorded,
|
698
|
+
%i[pointer pointer],
|
699
|
+
:int
|
688
700
|
end
|
689
701
|
end
|
690
702
|
|
data/lib/hts/libhts/sam_funcs.rb
CHANGED
@@ -30,15 +30,15 @@ module HTS
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def bam_cigar_opchr(c)
|
33
|
-
|
33
|
+
"#{BAM_CIGAR_STR}??????"[bam_cigar_op(c)]
|
34
34
|
end
|
35
35
|
|
36
36
|
def bam_cigar_gen(l, o)
|
37
|
-
l << BAM_CIGAR_SHIFT | o
|
37
|
+
(l << BAM_CIGAR_SHIFT) | o
|
38
38
|
end
|
39
39
|
|
40
40
|
def bam_cigar_type(o)
|
41
|
-
BAM_CIGAR_TYPE >> (o << 1) & 3
|
41
|
+
(BAM_CIGAR_TYPE >> (o << 1)) & 3
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
@@ -58,43 +58,98 @@ module HTS
|
|
58
58
|
# macros
|
59
59
|
# function-like macros
|
60
60
|
class << self
|
61
|
+
# Get whether the query is on the reverse strand
|
61
62
|
def bam_is_rev(b)
|
62
63
|
b[:core][:flag] & BAM_FREVERSE != 0
|
63
64
|
end
|
64
65
|
|
66
|
+
# Get whether the query's mate is on the reverse strand
|
65
67
|
def bam_is_mrev(b)
|
66
68
|
b[:core][:flag] & BAM_FMREVERSE != 0
|
67
69
|
end
|
68
70
|
|
71
|
+
# Get the name of the query
|
69
72
|
def bam_get_qname(b)
|
70
73
|
b[:data]
|
71
74
|
end
|
72
75
|
|
76
|
+
# Get the CIGAR array
|
73
77
|
def bam_get_cigar(b)
|
74
78
|
b[:data] + b[:core][:l_qname]
|
75
79
|
end
|
76
80
|
|
81
|
+
# Get query sequence
|
77
82
|
def bam_get_seq(b)
|
78
83
|
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
|
79
84
|
end
|
80
85
|
|
86
|
+
# Get query quality
|
81
87
|
def bam_get_qual(b)
|
82
88
|
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
|
83
89
|
end
|
84
90
|
|
91
|
+
# Get auxiliary data
|
85
92
|
def bam_get_aux(b)
|
86
93
|
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
|
87
94
|
end
|
88
95
|
|
96
|
+
# Get length of auxiliary data
|
89
97
|
def bam_get_l_aux(b)
|
90
98
|
b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
|
91
99
|
end
|
92
100
|
|
101
|
+
# Get a base on read
|
93
102
|
def bam_seqi(s, i)
|
94
|
-
s[
|
103
|
+
(s[i >> 1].read_uint8 >> ((~i & 1) << 2)) & 0xf
|
95
104
|
end
|
96
105
|
|
97
|
-
#
|
106
|
+
# Modifies a single base in the bam structure.
|
107
|
+
def bam_set_seqi(s, i, b)
|
108
|
+
s[i >> 1] = (s[i >> 1] & (0xf0 >> ((~i & 1) << 2))) | ((b) << ((~i & 1) << 2))
|
109
|
+
end
|
110
|
+
|
111
|
+
# Returns the SAM formatted text of the \@HD header line
|
112
|
+
def sam_hdr_find_hd(h, ks)
|
113
|
+
sam_hdr_find_line_id(h, "HD", nil, nil, ks)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns the value associated with a given \@HD line tag
|
117
|
+
def sam_hdr_find_tag_hd(h, key, ks)
|
118
|
+
sam_hdr_find_tag_id(h, "HD", nil, nil, key, ks)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Adds or updates tags on the header \@HD line
|
122
|
+
def sam_hdr_update_hd(h, *args)
|
123
|
+
sam_hdr_update_line(h, "HD", nil, nil, *args, nil)
|
124
|
+
end
|
125
|
+
|
126
|
+
# Removes the \@HD line tag with the given key
|
127
|
+
def sam_hdr_remove_tag_hd(h, key)
|
128
|
+
sam_hdr_remove_tag_id(h, "HD", nil, nil, key)
|
129
|
+
end
|
130
|
+
|
131
|
+
BAM_USER_OWNS_STRUCT = 1
|
132
|
+
BAM_USER_OWNS_DATA = 2
|
133
|
+
|
134
|
+
alias bam_itr_destroy hts_itr_destroy
|
135
|
+
alias bam_itr_queryi sam_itr_queryi
|
136
|
+
alias bam_itr_querys sam_itr_querys
|
137
|
+
alias bam_itr_next sam_itr_next
|
138
|
+
|
139
|
+
# Load/build .csi or .bai BAM index file. Does not work with CRAM.
|
140
|
+
# It is recommended to use the sam_index_* functions below instead.
|
141
|
+
def bam_index_load(fn)
|
142
|
+
hts_idx_load(fn, HTS_FMT_BAI)
|
143
|
+
end
|
144
|
+
|
145
|
+
alias bam_index_build sam_index_build
|
146
|
+
|
147
|
+
alias sam_itr_destroy hts_itr_destroy
|
148
|
+
|
149
|
+
alias sam_open hts_open
|
150
|
+
alias sam_open_format hts_open_format
|
151
|
+
alias sam_flush hts_flush
|
152
|
+
alias sam_close hts_close
|
98
153
|
end
|
99
154
|
end
|
100
155
|
end
|
data/lib/hts/libhts/tbx.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
module HTS
|
2
|
+
module LibHTS
|
3
|
+
class << self
|
4
|
+
def tbx_itr_destroy(iter)
|
5
|
+
hts_itr_destroy(iter)
|
6
|
+
end
|
7
|
+
|
8
|
+
def tbx_itr_queryi(tbx, tid, beg, end_)
|
9
|
+
hts_itr_query(tbx[:idx], tid, beg, end_, tbx_readrec)
|
10
|
+
end
|
11
|
+
|
12
|
+
def tbx_itr_querys(tbx, s)
|
13
|
+
hts_itr_querys(tbx[:idx], s, @@tbx_name2id, tbx, @@hts_itr_query, @@tbx_readrec)
|
14
|
+
end
|
15
|
+
|
16
|
+
def tbx_itr_next(htsfp, tbx, itr, r)
|
17
|
+
hts_itr_next(hts_get_bgzfp(htsfp), itr, r, tbx)
|
18
|
+
end
|
19
|
+
|
20
|
+
def tbx_bgzf_itr_next(bgzfp, tbx, itr, r)
|
21
|
+
hts_itr_next(bgzfp, itr, r, tbx)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|