htslib 0.0.2 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +50 -22
- data/lib/hts/bam/cigar.rb +11 -6
- data/lib/hts/bam/flag.rb +97 -0
- data/lib/hts/bam/header.rb +17 -7
- data/lib/hts/bam/record.rb +199 -0
- data/lib/hts/bam.rb +67 -32
- data/lib/hts/bcf/format.rb +53 -0
- data/lib/hts/bcf/header.rb +26 -0
- data/lib/hts/bcf/info.rb +94 -0
- data/lib/hts/bcf/record.rb +113 -0
- data/lib/hts/bcf.rb +73 -0
- data/lib/hts/faidx.rb +59 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +126 -47
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +13 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/libhts/sam.rb +102 -0
- data/lib/hts/{ffi/sam.rb → libhts/sam_funcs.rb} +24 -120
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/libhts/vcf.rb +226 -0
- data/lib/hts/{ffi/vcf.rb → libhts/vcf_funcs.rb} +1 -70
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +28 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +48 -27
- data/lib/hts/bam/alignment.rb +0 -155
- data/lib/hts/fai.rb +0 -57
- data/lib/hts/ffi.rb +0 -85
- data/lib/hts/tbx.rb +0 -16
- data/lib/hts/vcf/header.rb +0 -24
- data/lib/hts/vcf/variant.rb +0 -43
- data/lib/hts/vcf.rb +0 -42
@@ -1,34 +1,34 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
typedef :pointer, :HFILE
|
6
6
|
typedef :int64, :hts_pos_t
|
7
7
|
typedef :pointer, :bam_plp_auto_f
|
8
8
|
|
9
9
|
# kstring
|
10
10
|
|
11
|
-
class KString <
|
11
|
+
class KString < FFI::Struct
|
12
12
|
layout \
|
13
13
|
:l, :size_t,
|
14
14
|
:m, :size_t,
|
15
15
|
:s, :string
|
16
16
|
end
|
17
17
|
|
18
|
-
class KSeq <
|
18
|
+
class KSeq < FFI::Struct
|
19
19
|
layout \
|
20
20
|
:name, KString,
|
21
21
|
:comment, KString,
|
22
22
|
:seq, KString,
|
23
23
|
:qual, KString,
|
24
24
|
:last_char, :int,
|
25
|
-
:f, :pointer #
|
25
|
+
:f, :pointer # kstream_t
|
26
26
|
end
|
27
27
|
|
28
28
|
# BGZF
|
29
|
-
class BGZF <
|
29
|
+
class BGZF < FFI::BitStruct
|
30
30
|
layout \
|
31
|
-
:
|
31
|
+
:_flags, :uint, # bit_fields
|
32
32
|
:cache_size, :int,
|
33
33
|
:block_length, :int,
|
34
34
|
:block_clength, :int,
|
@@ -44,9 +44,29 @@ module HTS
|
|
44
44
|
:idx_build_otf, :int,
|
45
45
|
:gz_stream, :pointer,
|
46
46
|
:seeked, :int64
|
47
|
+
|
48
|
+
bit_fields :_flags,
|
49
|
+
:errcode, 16,
|
50
|
+
:_reserved, 1,
|
51
|
+
:is_write, 1,
|
52
|
+
:no_eof_block, 1,
|
53
|
+
:is_be, 1,
|
54
|
+
:compress_level, 9,
|
55
|
+
:last_block_eof, 1,
|
56
|
+
:is_compressed, 1,
|
57
|
+
:is_gzip, 1
|
47
58
|
end
|
48
59
|
|
49
60
|
# hts
|
61
|
+
HtsLogLevel = enum(
|
62
|
+
:off, # All logging disabled.
|
63
|
+
:error, # Logging of errors only.
|
64
|
+
:warning, 3, # Logging of errors and warnings.
|
65
|
+
:info, # Logging of errors, warnings, and normal but significant events.
|
66
|
+
:debug, # Logging of all except the most detailed debug events.
|
67
|
+
:trace # All logging enabled.
|
68
|
+
)
|
69
|
+
|
50
70
|
HtsFormatCategory = enum(
|
51
71
|
:unknown_category,
|
52
72
|
:sequence_data, # Sequence data -- SAM, BAM, CRAM, etc
|
@@ -105,7 +125,7 @@ module HTS
|
|
105
125
|
:HTS_OPT_BLOCK_SIZE
|
106
126
|
)
|
107
127
|
|
108
|
-
class HtsFormat <
|
128
|
+
class HtsFormat < FFI::Struct
|
109
129
|
layout \
|
110
130
|
:category, HtsFormatCategory,
|
111
131
|
:format, HtsExactFormat,
|
@@ -119,7 +139,7 @@ module HTS
|
|
119
139
|
:specific, :pointer
|
120
140
|
end
|
121
141
|
|
122
|
-
class HtsIdx <
|
142
|
+
class HtsIdx < FFI::Struct
|
123
143
|
layout \
|
124
144
|
:fmt, :int,
|
125
145
|
:min_shift, :int,
|
@@ -152,7 +172,7 @@ module HTS
|
|
152
172
|
end
|
153
173
|
|
154
174
|
# HtsFile
|
155
|
-
class SamHdr <
|
175
|
+
class SamHdr < FFI::Struct
|
156
176
|
layout \
|
157
177
|
:n_targets, :int32,
|
158
178
|
:ignore_sam_err, :int32,
|
@@ -168,9 +188,9 @@ module HTS
|
|
168
188
|
|
169
189
|
BamHdr = SamHdr
|
170
190
|
|
171
|
-
class HtsFile <
|
191
|
+
class HtsFile < FFI::BitStruct
|
172
192
|
layout \
|
173
|
-
:
|
193
|
+
:_flags, :uint32, # bit_fields
|
174
194
|
:lineno, :int64,
|
175
195
|
:line, KString,
|
176
196
|
:fn, :string,
|
@@ -186,17 +206,25 @@ module HTS
|
|
186
206
|
:idx, HtsIdx.ptr,
|
187
207
|
:fnidx, :string,
|
188
208
|
:bam_header, SamHdr.ptr
|
209
|
+
|
210
|
+
bit_fields :_flags,
|
211
|
+
:is_bin, 1,
|
212
|
+
:is_write, 1,
|
213
|
+
:is_be, 1,
|
214
|
+
:is_cram, 1,
|
215
|
+
:is_bgzf, 1,
|
216
|
+
:dummy, 27
|
189
217
|
end
|
190
218
|
|
191
219
|
SamFile = HtsFile
|
192
220
|
|
193
|
-
class HtsThreadPool <
|
221
|
+
class HtsThreadPool < FFI::Struct
|
194
222
|
layout \
|
195
223
|
:pool, :pointer,
|
196
224
|
:qsize, :int
|
197
225
|
end
|
198
226
|
|
199
|
-
class HtsOpt <
|
227
|
+
class HtsOpt < FFI::Struct
|
200
228
|
layout \
|
201
229
|
:arg, :string,
|
202
230
|
:opt, HtsFmtOption,
|
@@ -208,9 +236,9 @@ module HTS
|
|
208
236
|
:next, HtsOpt.ptr
|
209
237
|
end
|
210
238
|
|
211
|
-
class HtsItr <
|
239
|
+
class HtsItr < FFI::BitStruct
|
212
240
|
layout \
|
213
|
-
:
|
241
|
+
:_flags, :uint32, # bit_fields
|
214
242
|
:tid, :int,
|
215
243
|
:n_off, :int,
|
216
244
|
:i, :int,
|
@@ -235,9 +263,17 @@ module HTS
|
|
235
263
|
:m, :int,
|
236
264
|
:a, :pointer
|
237
265
|
)
|
266
|
+
|
267
|
+
bit_fields :_flags,
|
268
|
+
:read_rest, 1,
|
269
|
+
:finished, 1,
|
270
|
+
:is_cram, 1,
|
271
|
+
:nocoor, 1,
|
272
|
+
:multi, 1,
|
273
|
+
:dummy, 27
|
238
274
|
end
|
239
275
|
|
240
|
-
class Bam1Core <
|
276
|
+
class Bam1Core < FFI::Struct
|
241
277
|
layout \
|
242
278
|
:pos, :hts_pos_t,
|
243
279
|
:tid, :int32,
|
@@ -253,23 +289,54 @@ module HTS
|
|
253
289
|
:isize, :hts_pos_t
|
254
290
|
end
|
255
291
|
|
256
|
-
class Bam1 <
|
292
|
+
class Bam1 < FFI::ManagedStruct
|
257
293
|
layout \
|
258
294
|
:core, Bam1Core,
|
259
295
|
:id, :uint64,
|
260
296
|
:data, :pointer, # uint8_t
|
261
297
|
:l_data, :int,
|
262
298
|
:m_data, :uint32,
|
263
|
-
:
|
299
|
+
:_mempolicy, :uint32 # bit_fields
|
300
|
+
|
301
|
+
# bit_fields :_mempolicy,
|
302
|
+
# :mempolicy, 2,
|
303
|
+
# :_reserved, 30
|
304
|
+
|
305
|
+
def self.release(ptr)
|
306
|
+
LibHTS.bam_destroy1(ptr) unless ptr.null?
|
307
|
+
end
|
264
308
|
end
|
265
309
|
|
266
|
-
|
310
|
+
typedef :pointer, :bam_plp
|
311
|
+
typedef :pointer, :bam_mplp
|
312
|
+
|
313
|
+
class BamPileupCd < FFI::Union
|
314
|
+
layout \
|
315
|
+
:p, :pointer,
|
316
|
+
:i, :int64_t,
|
317
|
+
:f, :double
|
267
318
|
end
|
268
319
|
|
269
|
-
class
|
320
|
+
class BamPileup1 < FFI::BitStruct
|
321
|
+
layout \
|
322
|
+
:b, Bam1.ptr,
|
323
|
+
:qpos, :int32_t,
|
324
|
+
:indel, :int,
|
325
|
+
:level, :int,
|
326
|
+
:_flags, :uint32_t, # bit_fields
|
327
|
+
:cd, BamPileupCd,
|
328
|
+
:cigar_ind, :int
|
329
|
+
|
330
|
+
bit_fields :_flags,
|
331
|
+
:is_del, 1,
|
332
|
+
:is_head, 1,
|
333
|
+
:is_tail, 1,
|
334
|
+
:is_refskip, 1,
|
335
|
+
:_reserved, 1,
|
336
|
+
:aux, 27
|
270
337
|
end
|
271
338
|
|
272
|
-
class TbxConf <
|
339
|
+
class TbxConf < FFI::Struct
|
273
340
|
layout \
|
274
341
|
:preset, :int32,
|
275
342
|
:sc, :int32,
|
@@ -279,7 +346,7 @@ module HTS
|
|
279
346
|
:line_skip, :int32
|
280
347
|
end
|
281
348
|
|
282
|
-
class Tbx <
|
349
|
+
class Tbx < FFI::Struct
|
283
350
|
layout \
|
284
351
|
:conf, TbxConf.ptr,
|
285
352
|
:idx, HtsIdx.ptr,
|
@@ -290,7 +357,7 @@ module HTS
|
|
290
357
|
|
291
358
|
FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
|
292
359
|
|
293
|
-
class Faidx <
|
360
|
+
class Faidx < FFI::Struct
|
294
361
|
layout :bgzf, BGZF,
|
295
362
|
:n, :int,
|
296
363
|
:m, :int,
|
@@ -299,15 +366,15 @@ module HTS
|
|
299
366
|
:format, FaiFormatOptions
|
300
367
|
end
|
301
368
|
|
302
|
-
#
|
369
|
+
# bcf
|
303
370
|
|
304
|
-
class
|
371
|
+
class BcfVariant < FFI::Struct
|
305
372
|
layout \
|
306
373
|
:type, :int,
|
307
374
|
:n, :int
|
308
375
|
end
|
309
376
|
|
310
|
-
class BcfHrec <
|
377
|
+
class BcfHrec < FFI::Struct
|
311
378
|
layout \
|
312
379
|
:type, :int,
|
313
380
|
:key, :string,
|
@@ -317,7 +384,7 @@ module HTS
|
|
317
384
|
:vals, :pointer
|
318
385
|
end
|
319
386
|
|
320
|
-
class BcfFmt <
|
387
|
+
class BcfFmt < FFI::BitStruct
|
321
388
|
layout \
|
322
389
|
:id, :int,
|
323
390
|
:n, :int,
|
@@ -325,10 +392,14 @@ module HTS
|
|
325
392
|
:type, :int,
|
326
393
|
:p, :pointer, # uint8_t
|
327
394
|
:p_len, :uint32,
|
328
|
-
:
|
395
|
+
:_p_off_free, :uint32 # bit_fields
|
396
|
+
|
397
|
+
bit_fields :_p_off_free,
|
398
|
+
:p_off, 31,
|
399
|
+
:p_free, 1
|
329
400
|
end
|
330
401
|
|
331
|
-
class BcfInfo <
|
402
|
+
class BcfInfo < FFI::BitStruct
|
332
403
|
layout \
|
333
404
|
:key, :int,
|
334
405
|
:type, :int,
|
@@ -339,27 +410,31 @@ module HTS
|
|
339
410
|
),
|
340
411
|
:vptr, :pointer,
|
341
412
|
:vptr_len, :uint32,
|
342
|
-
:
|
413
|
+
:_vptr_off_free, :uint32, # bit_fields
|
343
414
|
:len, :int
|
415
|
+
|
416
|
+
bit_fields :_vptr_off_free,
|
417
|
+
:vptr_off, 31,
|
418
|
+
:vptr_free, 1
|
344
419
|
end
|
345
420
|
|
346
|
-
class BcfIdinfo <
|
421
|
+
class BcfIdinfo < FFI::Struct
|
347
422
|
layout \
|
348
|
-
:info, [:
|
423
|
+
:info, [:uint64_t, 3],
|
349
424
|
:hrec, [BcfHrec.ptr, 3],
|
350
425
|
:id, :int
|
351
426
|
end
|
352
427
|
|
353
|
-
class BcfIdpair <
|
428
|
+
class BcfIdpair < FFI::Struct
|
354
429
|
layout \
|
355
430
|
:key, :string,
|
356
431
|
:val, BcfIdinfo.ptr
|
357
432
|
end
|
358
433
|
|
359
|
-
class BcfHdr <
|
434
|
+
class BcfHdr < FFI::Struct
|
360
435
|
layout \
|
361
436
|
:n, [:int, 3],
|
362
|
-
:id, [
|
437
|
+
:id, [:pointer, 3], # BcfIdpair.ptr
|
363
438
|
:dict, [:pointer, 3],
|
364
439
|
:samples, :pointer,
|
365
440
|
:hrec, :pointer,
|
@@ -373,7 +448,7 @@ module HTS
|
|
373
448
|
:m, [:int, 3]
|
374
449
|
end
|
375
450
|
|
376
|
-
class BcfDec <
|
451
|
+
class BcfDec < FFI::Struct
|
377
452
|
layout \
|
378
453
|
:m_fmt, :int,
|
379
454
|
:m_info, :int,
|
@@ -386,23 +461,23 @@ module HTS
|
|
386
461
|
:id, :string,
|
387
462
|
:als, :pointer, # (\\0-separated string)
|
388
463
|
:allele, :pointer,
|
389
|
-
:info, BcfInfo.ptr,
|
464
|
+
:info, :pointer, # BcfInfo.ptr,
|
390
465
|
:fmt, BcfFmt.ptr,
|
391
|
-
:var,
|
466
|
+
:var, BcfVariant.ptr,
|
392
467
|
:n_var, :int,
|
393
468
|
:var_type, :int,
|
394
469
|
:shared_dirty, :int,
|
395
470
|
:indiv_dirty, :int
|
396
471
|
end
|
397
472
|
|
398
|
-
class Bcf1 <
|
473
|
+
class Bcf1 < FFI::ManagedBitStruct
|
399
474
|
layout \
|
400
475
|
:pos, :hts_pos_t,
|
401
476
|
:rlen, :hts_pos_t,
|
402
477
|
:rid, :int32_t,
|
403
478
|
:qual, :float,
|
404
|
-
:
|
405
|
-
:
|
479
|
+
:_n_info_allele, :uint32_t,
|
480
|
+
:_n_fmt_sample, :uint32_t,
|
406
481
|
:shared, KString,
|
407
482
|
:indiv, KString,
|
408
483
|
:d, BcfDec,
|
@@ -411,13 +486,17 @@ module HTS
|
|
411
486
|
:unpack_size, [:int, 3],
|
412
487
|
:errcode, :int
|
413
488
|
|
414
|
-
|
415
|
-
|
416
|
-
|
489
|
+
bit_fields :_n_info_allele,
|
490
|
+
:n_info, 16,
|
491
|
+
:n_allele, 16
|
492
|
+
|
493
|
+
bit_fields :_n_fmt_sample,
|
494
|
+
:n_fmt, 8,
|
495
|
+
:n_sample, 24
|
417
496
|
|
418
|
-
|
419
|
-
|
420
|
-
|
497
|
+
def self.release(ptr)
|
498
|
+
LibHTS.bcf_destroy(ptr) unless ptr.null?
|
499
|
+
end
|
421
500
|
end
|
422
501
|
end
|
423
502
|
end
|
@@ -1,11 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
# hts_expand
|
6
6
|
# hts_expand3
|
7
7
|
# hts_resize
|
8
8
|
|
9
|
+
# hts_log.h
|
10
|
+
attach_function \
|
11
|
+
:hts_get_log_level,
|
12
|
+
[],
|
13
|
+
HtsLogLevel
|
14
|
+
|
9
15
|
attach_function \
|
10
16
|
:hts_lib_shutdown,
|
11
17
|
[],
|
@@ -83,6 +89,12 @@ module HTS
|
|
83
89
|
%i[HFILE string string],
|
84
90
|
HtsFile.by_ref
|
85
91
|
|
92
|
+
# For output streams, flush any buffered data
|
93
|
+
attach_function \
|
94
|
+
:hts_flush,
|
95
|
+
[HtsFile],
|
96
|
+
:int
|
97
|
+
|
86
98
|
# Close a file handle, flushing buffered data for output streams
|
87
99
|
attach_function \
|
88
100
|
:hts_close,
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "sam_funcs"
|
4
|
+
|
5
|
+
module HTS
|
6
|
+
module LibHTS
|
7
|
+
# constants
|
8
|
+
BAM_CMATCH = 0
|
9
|
+
BAM_CINS = 1
|
10
|
+
BAM_CDEL = 2
|
11
|
+
BAM_CREF_SKIP = 3
|
12
|
+
BAM_CSOFT_CLIP = 4
|
13
|
+
BAM_CHARD_CLIP = 5
|
14
|
+
BAM_CPAD = 6
|
15
|
+
BAM_CEQUAL = 7
|
16
|
+
BAM_CDIFF = 8
|
17
|
+
BAM_CBACK = 9
|
18
|
+
|
19
|
+
BAM_CIGAR_STR = "MIDNSHP=XB"
|
20
|
+
BAM_CIGAR_SHIFT = 4
|
21
|
+
BAM_CIGAR_MASK = 0xf
|
22
|
+
BAM_CIGAR_TYPE = 0x3C1A7
|
23
|
+
|
24
|
+
# macros
|
25
|
+
class << self
|
26
|
+
def bam_cigar_op(c)
|
27
|
+
c & BAM_CIGAR_MASK
|
28
|
+
end
|
29
|
+
|
30
|
+
def bam_cigar_oplen(c)
|
31
|
+
c >> BAM_CIGAR_SHIFT
|
32
|
+
end
|
33
|
+
|
34
|
+
def bam_cigar_opchr(c)
|
35
|
+
("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
|
36
|
+
end
|
37
|
+
|
38
|
+
def bam_cigar_gen(l, o)
|
39
|
+
l << BAM_CIGAR_SHIFT | o
|
40
|
+
end
|
41
|
+
|
42
|
+
def bam_cigar_type(o)
|
43
|
+
BAM_CIGAR_TYPE >> (o << 1) & 3
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
BAM_FPAIRED = 1
|
48
|
+
BAM_FPROPER_PAIR = 2
|
49
|
+
BAM_FUNMAP = 4
|
50
|
+
BAM_FMUNMAP = 8
|
51
|
+
BAM_FREVERSE = 16
|
52
|
+
BAM_FMREVERSE = 32
|
53
|
+
BAM_FREAD1 = 64
|
54
|
+
BAM_FREAD2 = 128
|
55
|
+
BAM_FSECONDARY = 256
|
56
|
+
BAM_FQCFAIL = 512
|
57
|
+
BAM_FDUP = 1024
|
58
|
+
BAM_FSUPPLEMENTARY = 2048
|
59
|
+
|
60
|
+
# macros
|
61
|
+
# function-like macros
|
62
|
+
class << self
|
63
|
+
def bam_is_rev(b)
|
64
|
+
b[:core][:flag] & BAM_FREVERSE != 0
|
65
|
+
end
|
66
|
+
|
67
|
+
def bam_is_mrev(b)
|
68
|
+
b[:core][:flag] & BAM_FMREVERSE != 0
|
69
|
+
end
|
70
|
+
|
71
|
+
def bam_get_qname(b)
|
72
|
+
b[:data]
|
73
|
+
end
|
74
|
+
|
75
|
+
def bam_get_cigar(b)
|
76
|
+
b[:data] + b[:core][:l_qname]
|
77
|
+
end
|
78
|
+
|
79
|
+
def bam_get_seq(b)
|
80
|
+
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
|
81
|
+
end
|
82
|
+
|
83
|
+
def bam_get_qual(b)
|
84
|
+
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
|
85
|
+
end
|
86
|
+
|
87
|
+
def bam_get_aux(b)
|
88
|
+
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
|
89
|
+
end
|
90
|
+
|
91
|
+
def bam_get_l_aux(b)
|
92
|
+
b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
|
93
|
+
end
|
94
|
+
|
95
|
+
def bam_seqi(s, i)
|
96
|
+
s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
|
97
|
+
end
|
98
|
+
|
99
|
+
# def bam_set_seqi(s, i, b)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|