htslib 0.0.2 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +50 -22
- data/lib/hts/bam/cigar.rb +11 -6
- data/lib/hts/bam/flag.rb +97 -0
- data/lib/hts/bam/header.rb +17 -7
- data/lib/hts/bam/record.rb +199 -0
- data/lib/hts/bam.rb +67 -32
- data/lib/hts/bcf/format.rb +53 -0
- data/lib/hts/bcf/header.rb +26 -0
- data/lib/hts/bcf/info.rb +94 -0
- data/lib/hts/bcf/record.rb +113 -0
- data/lib/hts/bcf.rb +73 -0
- data/lib/hts/faidx.rb +59 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +126 -47
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +13 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/libhts/sam.rb +102 -0
- data/lib/hts/{ffi/sam.rb → libhts/sam_funcs.rb} +24 -120
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/libhts/vcf.rb +226 -0
- data/lib/hts/{ffi/vcf.rb → libhts/vcf_funcs.rb} +1 -70
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +28 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +48 -27
- data/lib/hts/bam/alignment.rb +0 -155
- data/lib/hts/fai.rb +0 -57
- data/lib/hts/ffi.rb +0 -85
- data/lib/hts/tbx.rb +0 -16
- data/lib/hts/vcf/header.rb +0 -24
- data/lib/hts/vcf/variant.rb +0 -43
- data/lib/hts/vcf.rb +0 -42
@@ -1,34 +1,34 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
typedef :pointer, :HFILE
|
6
6
|
typedef :int64, :hts_pos_t
|
7
7
|
typedef :pointer, :bam_plp_auto_f
|
8
8
|
|
9
9
|
# kstring
|
10
10
|
|
11
|
-
class KString <
|
11
|
+
class KString < FFI::Struct
|
12
12
|
layout \
|
13
13
|
:l, :size_t,
|
14
14
|
:m, :size_t,
|
15
15
|
:s, :string
|
16
16
|
end
|
17
17
|
|
18
|
-
class KSeq <
|
18
|
+
class KSeq < FFI::Struct
|
19
19
|
layout \
|
20
20
|
:name, KString,
|
21
21
|
:comment, KString,
|
22
22
|
:seq, KString,
|
23
23
|
:qual, KString,
|
24
24
|
:last_char, :int,
|
25
|
-
:f, :pointer #
|
25
|
+
:f, :pointer # kstream_t
|
26
26
|
end
|
27
27
|
|
28
28
|
# BGZF
|
29
|
-
class BGZF <
|
29
|
+
class BGZF < FFI::BitStruct
|
30
30
|
layout \
|
31
|
-
:
|
31
|
+
:_flags, :uint, # bit_fields
|
32
32
|
:cache_size, :int,
|
33
33
|
:block_length, :int,
|
34
34
|
:block_clength, :int,
|
@@ -44,9 +44,29 @@ module HTS
|
|
44
44
|
:idx_build_otf, :int,
|
45
45
|
:gz_stream, :pointer,
|
46
46
|
:seeked, :int64
|
47
|
+
|
48
|
+
bit_fields :_flags,
|
49
|
+
:errcode, 16,
|
50
|
+
:_reserved, 1,
|
51
|
+
:is_write, 1,
|
52
|
+
:no_eof_block, 1,
|
53
|
+
:is_be, 1,
|
54
|
+
:compress_level, 9,
|
55
|
+
:last_block_eof, 1,
|
56
|
+
:is_compressed, 1,
|
57
|
+
:is_gzip, 1
|
47
58
|
end
|
48
59
|
|
49
60
|
# hts
|
61
|
+
HtsLogLevel = enum(
|
62
|
+
:off, # All logging disabled.
|
63
|
+
:error, # Logging of errors only.
|
64
|
+
:warning, 3, # Logging of errors and warnings.
|
65
|
+
:info, # Logging of errors, warnings, and normal but significant events.
|
66
|
+
:debug, # Logging of all except the most detailed debug events.
|
67
|
+
:trace # All logging enabled.
|
68
|
+
)
|
69
|
+
|
50
70
|
HtsFormatCategory = enum(
|
51
71
|
:unknown_category,
|
52
72
|
:sequence_data, # Sequence data -- SAM, BAM, CRAM, etc
|
@@ -105,7 +125,7 @@ module HTS
|
|
105
125
|
:HTS_OPT_BLOCK_SIZE
|
106
126
|
)
|
107
127
|
|
108
|
-
class HtsFormat <
|
128
|
+
class HtsFormat < FFI::Struct
|
109
129
|
layout \
|
110
130
|
:category, HtsFormatCategory,
|
111
131
|
:format, HtsExactFormat,
|
@@ -119,7 +139,7 @@ module HTS
|
|
119
139
|
:specific, :pointer
|
120
140
|
end
|
121
141
|
|
122
|
-
class HtsIdx <
|
142
|
+
class HtsIdx < FFI::Struct
|
123
143
|
layout \
|
124
144
|
:fmt, :int,
|
125
145
|
:min_shift, :int,
|
@@ -152,7 +172,7 @@ module HTS
|
|
152
172
|
end
|
153
173
|
|
154
174
|
# HtsFile
|
155
|
-
class SamHdr <
|
175
|
+
class SamHdr < FFI::Struct
|
156
176
|
layout \
|
157
177
|
:n_targets, :int32,
|
158
178
|
:ignore_sam_err, :int32,
|
@@ -168,9 +188,9 @@ module HTS
|
|
168
188
|
|
169
189
|
BamHdr = SamHdr
|
170
190
|
|
171
|
-
class HtsFile <
|
191
|
+
class HtsFile < FFI::BitStruct
|
172
192
|
layout \
|
173
|
-
:
|
193
|
+
:_flags, :uint32, # bit_fields
|
174
194
|
:lineno, :int64,
|
175
195
|
:line, KString,
|
176
196
|
:fn, :string,
|
@@ -186,17 +206,25 @@ module HTS
|
|
186
206
|
:idx, HtsIdx.ptr,
|
187
207
|
:fnidx, :string,
|
188
208
|
:bam_header, SamHdr.ptr
|
209
|
+
|
210
|
+
bit_fields :_flags,
|
211
|
+
:is_bin, 1,
|
212
|
+
:is_write, 1,
|
213
|
+
:is_be, 1,
|
214
|
+
:is_cram, 1,
|
215
|
+
:is_bgzf, 1,
|
216
|
+
:dummy, 27
|
189
217
|
end
|
190
218
|
|
191
219
|
SamFile = HtsFile
|
192
220
|
|
193
|
-
class HtsThreadPool <
|
221
|
+
class HtsThreadPool < FFI::Struct
|
194
222
|
layout \
|
195
223
|
:pool, :pointer,
|
196
224
|
:qsize, :int
|
197
225
|
end
|
198
226
|
|
199
|
-
class HtsOpt <
|
227
|
+
class HtsOpt < FFI::Struct
|
200
228
|
layout \
|
201
229
|
:arg, :string,
|
202
230
|
:opt, HtsFmtOption,
|
@@ -208,9 +236,9 @@ module HTS
|
|
208
236
|
:next, HtsOpt.ptr
|
209
237
|
end
|
210
238
|
|
211
|
-
class HtsItr <
|
239
|
+
class HtsItr < FFI::BitStruct
|
212
240
|
layout \
|
213
|
-
:
|
241
|
+
:_flags, :uint32, # bit_fields
|
214
242
|
:tid, :int,
|
215
243
|
:n_off, :int,
|
216
244
|
:i, :int,
|
@@ -235,9 +263,17 @@ module HTS
|
|
235
263
|
:m, :int,
|
236
264
|
:a, :pointer
|
237
265
|
)
|
266
|
+
|
267
|
+
bit_fields :_flags,
|
268
|
+
:read_rest, 1,
|
269
|
+
:finished, 1,
|
270
|
+
:is_cram, 1,
|
271
|
+
:nocoor, 1,
|
272
|
+
:multi, 1,
|
273
|
+
:dummy, 27
|
238
274
|
end
|
239
275
|
|
240
|
-
class Bam1Core <
|
276
|
+
class Bam1Core < FFI::Struct
|
241
277
|
layout \
|
242
278
|
:pos, :hts_pos_t,
|
243
279
|
:tid, :int32,
|
@@ -253,23 +289,54 @@ module HTS
|
|
253
289
|
:isize, :hts_pos_t
|
254
290
|
end
|
255
291
|
|
256
|
-
class Bam1 <
|
292
|
+
class Bam1 < FFI::ManagedStruct
|
257
293
|
layout \
|
258
294
|
:core, Bam1Core,
|
259
295
|
:id, :uint64,
|
260
296
|
:data, :pointer, # uint8_t
|
261
297
|
:l_data, :int,
|
262
298
|
:m_data, :uint32,
|
263
|
-
:
|
299
|
+
:_mempolicy, :uint32 # bit_fields
|
300
|
+
|
301
|
+
# bit_fields :_mempolicy,
|
302
|
+
# :mempolicy, 2,
|
303
|
+
# :_reserved, 30
|
304
|
+
|
305
|
+
def self.release(ptr)
|
306
|
+
LibHTS.bam_destroy1(ptr) unless ptr.null?
|
307
|
+
end
|
264
308
|
end
|
265
309
|
|
266
|
-
|
310
|
+
typedef :pointer, :bam_plp
|
311
|
+
typedef :pointer, :bam_mplp
|
312
|
+
|
313
|
+
class BamPileupCd < FFI::Union
|
314
|
+
layout \
|
315
|
+
:p, :pointer,
|
316
|
+
:i, :int64_t,
|
317
|
+
:f, :double
|
267
318
|
end
|
268
319
|
|
269
|
-
class
|
320
|
+
class BamPileup1 < FFI::BitStruct
|
321
|
+
layout \
|
322
|
+
:b, Bam1.ptr,
|
323
|
+
:qpos, :int32_t,
|
324
|
+
:indel, :int,
|
325
|
+
:level, :int,
|
326
|
+
:_flags, :uint32_t, # bit_fields
|
327
|
+
:cd, BamPileupCd,
|
328
|
+
:cigar_ind, :int
|
329
|
+
|
330
|
+
bit_fields :_flags,
|
331
|
+
:is_del, 1,
|
332
|
+
:is_head, 1,
|
333
|
+
:is_tail, 1,
|
334
|
+
:is_refskip, 1,
|
335
|
+
:_reserved, 1,
|
336
|
+
:aux, 27
|
270
337
|
end
|
271
338
|
|
272
|
-
class TbxConf <
|
339
|
+
class TbxConf < FFI::Struct
|
273
340
|
layout \
|
274
341
|
:preset, :int32,
|
275
342
|
:sc, :int32,
|
@@ -279,7 +346,7 @@ module HTS
|
|
279
346
|
:line_skip, :int32
|
280
347
|
end
|
281
348
|
|
282
|
-
class Tbx <
|
349
|
+
class Tbx < FFI::Struct
|
283
350
|
layout \
|
284
351
|
:conf, TbxConf.ptr,
|
285
352
|
:idx, HtsIdx.ptr,
|
@@ -290,7 +357,7 @@ module HTS
|
|
290
357
|
|
291
358
|
FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
|
292
359
|
|
293
|
-
class Faidx <
|
360
|
+
class Faidx < FFI::Struct
|
294
361
|
layout :bgzf, BGZF,
|
295
362
|
:n, :int,
|
296
363
|
:m, :int,
|
@@ -299,15 +366,15 @@ module HTS
|
|
299
366
|
:format, FaiFormatOptions
|
300
367
|
end
|
301
368
|
|
302
|
-
#
|
369
|
+
# bcf
|
303
370
|
|
304
|
-
class
|
371
|
+
class BcfVariant < FFI::Struct
|
305
372
|
layout \
|
306
373
|
:type, :int,
|
307
374
|
:n, :int
|
308
375
|
end
|
309
376
|
|
310
|
-
class BcfHrec <
|
377
|
+
class BcfHrec < FFI::Struct
|
311
378
|
layout \
|
312
379
|
:type, :int,
|
313
380
|
:key, :string,
|
@@ -317,7 +384,7 @@ module HTS
|
|
317
384
|
:vals, :pointer
|
318
385
|
end
|
319
386
|
|
320
|
-
class BcfFmt <
|
387
|
+
class BcfFmt < FFI::BitStruct
|
321
388
|
layout \
|
322
389
|
:id, :int,
|
323
390
|
:n, :int,
|
@@ -325,10 +392,14 @@ module HTS
|
|
325
392
|
:type, :int,
|
326
393
|
:p, :pointer, # uint8_t
|
327
394
|
:p_len, :uint32,
|
328
|
-
:
|
395
|
+
:_p_off_free, :uint32 # bit_fields
|
396
|
+
|
397
|
+
bit_fields :_p_off_free,
|
398
|
+
:p_off, 31,
|
399
|
+
:p_free, 1
|
329
400
|
end
|
330
401
|
|
331
|
-
class BcfInfo <
|
402
|
+
class BcfInfo < FFI::BitStruct
|
332
403
|
layout \
|
333
404
|
:key, :int,
|
334
405
|
:type, :int,
|
@@ -339,27 +410,31 @@ module HTS
|
|
339
410
|
),
|
340
411
|
:vptr, :pointer,
|
341
412
|
:vptr_len, :uint32,
|
342
|
-
:
|
413
|
+
:_vptr_off_free, :uint32, # bit_fields
|
343
414
|
:len, :int
|
415
|
+
|
416
|
+
bit_fields :_vptr_off_free,
|
417
|
+
:vptr_off, 31,
|
418
|
+
:vptr_free, 1
|
344
419
|
end
|
345
420
|
|
346
|
-
class BcfIdinfo <
|
421
|
+
class BcfIdinfo < FFI::Struct
|
347
422
|
layout \
|
348
|
-
:info, [:
|
423
|
+
:info, [:uint64_t, 3],
|
349
424
|
:hrec, [BcfHrec.ptr, 3],
|
350
425
|
:id, :int
|
351
426
|
end
|
352
427
|
|
353
|
-
class BcfIdpair <
|
428
|
+
class BcfIdpair < FFI::Struct
|
354
429
|
layout \
|
355
430
|
:key, :string,
|
356
431
|
:val, BcfIdinfo.ptr
|
357
432
|
end
|
358
433
|
|
359
|
-
class BcfHdr <
|
434
|
+
class BcfHdr < FFI::Struct
|
360
435
|
layout \
|
361
436
|
:n, [:int, 3],
|
362
|
-
:id, [
|
437
|
+
:id, [:pointer, 3], # BcfIdpair.ptr
|
363
438
|
:dict, [:pointer, 3],
|
364
439
|
:samples, :pointer,
|
365
440
|
:hrec, :pointer,
|
@@ -373,7 +448,7 @@ module HTS
|
|
373
448
|
:m, [:int, 3]
|
374
449
|
end
|
375
450
|
|
376
|
-
class BcfDec <
|
451
|
+
class BcfDec < FFI::Struct
|
377
452
|
layout \
|
378
453
|
:m_fmt, :int,
|
379
454
|
:m_info, :int,
|
@@ -386,23 +461,23 @@ module HTS
|
|
386
461
|
:id, :string,
|
387
462
|
:als, :pointer, # (\\0-separated string)
|
388
463
|
:allele, :pointer,
|
389
|
-
:info, BcfInfo.ptr,
|
464
|
+
:info, :pointer, # BcfInfo.ptr,
|
390
465
|
:fmt, BcfFmt.ptr,
|
391
|
-
:var,
|
466
|
+
:var, BcfVariant.ptr,
|
392
467
|
:n_var, :int,
|
393
468
|
:var_type, :int,
|
394
469
|
:shared_dirty, :int,
|
395
470
|
:indiv_dirty, :int
|
396
471
|
end
|
397
472
|
|
398
|
-
class Bcf1 <
|
473
|
+
class Bcf1 < FFI::ManagedBitStruct
|
399
474
|
layout \
|
400
475
|
:pos, :hts_pos_t,
|
401
476
|
:rlen, :hts_pos_t,
|
402
477
|
:rid, :int32_t,
|
403
478
|
:qual, :float,
|
404
|
-
:
|
405
|
-
:
|
479
|
+
:_n_info_allele, :uint32_t,
|
480
|
+
:_n_fmt_sample, :uint32_t,
|
406
481
|
:shared, KString,
|
407
482
|
:indiv, KString,
|
408
483
|
:d, BcfDec,
|
@@ -411,13 +486,17 @@ module HTS
|
|
411
486
|
:unpack_size, [:int, 3],
|
412
487
|
:errcode, :int
|
413
488
|
|
414
|
-
|
415
|
-
|
416
|
-
|
489
|
+
bit_fields :_n_info_allele,
|
490
|
+
:n_info, 16,
|
491
|
+
:n_allele, 16
|
492
|
+
|
493
|
+
bit_fields :_n_fmt_sample,
|
494
|
+
:n_fmt, 8,
|
495
|
+
:n_sample, 24
|
417
496
|
|
418
|
-
|
419
|
-
|
420
|
-
|
497
|
+
def self.release(ptr)
|
498
|
+
LibHTS.bcf_destroy(ptr) unless ptr.null?
|
499
|
+
end
|
421
500
|
end
|
422
501
|
end
|
423
502
|
end
|
@@ -1,11 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
# hts_expand
|
6
6
|
# hts_expand3
|
7
7
|
# hts_resize
|
8
8
|
|
9
|
+
# hts_log.h
|
10
|
+
attach_function \
|
11
|
+
:hts_get_log_level,
|
12
|
+
[],
|
13
|
+
HtsLogLevel
|
14
|
+
|
9
15
|
attach_function \
|
10
16
|
:hts_lib_shutdown,
|
11
17
|
[],
|
@@ -83,6 +89,12 @@ module HTS
|
|
83
89
|
%i[HFILE string string],
|
84
90
|
HtsFile.by_ref
|
85
91
|
|
92
|
+
# For output streams, flush any buffered data
|
93
|
+
attach_function \
|
94
|
+
:hts_flush,
|
95
|
+
[HtsFile],
|
96
|
+
:int
|
97
|
+
|
86
98
|
# Close a file handle, flushing buffered data for output streams
|
87
99
|
attach_function \
|
88
100
|
:hts_close,
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "sam_funcs"
|
4
|
+
|
5
|
+
module HTS
|
6
|
+
module LibHTS
|
7
|
+
# constants
|
8
|
+
BAM_CMATCH = 0
|
9
|
+
BAM_CINS = 1
|
10
|
+
BAM_CDEL = 2
|
11
|
+
BAM_CREF_SKIP = 3
|
12
|
+
BAM_CSOFT_CLIP = 4
|
13
|
+
BAM_CHARD_CLIP = 5
|
14
|
+
BAM_CPAD = 6
|
15
|
+
BAM_CEQUAL = 7
|
16
|
+
BAM_CDIFF = 8
|
17
|
+
BAM_CBACK = 9
|
18
|
+
|
19
|
+
BAM_CIGAR_STR = "MIDNSHP=XB"
|
20
|
+
BAM_CIGAR_SHIFT = 4
|
21
|
+
BAM_CIGAR_MASK = 0xf
|
22
|
+
BAM_CIGAR_TYPE = 0x3C1A7
|
23
|
+
|
24
|
+
# macros
|
25
|
+
class << self
|
26
|
+
def bam_cigar_op(c)
|
27
|
+
c & BAM_CIGAR_MASK
|
28
|
+
end
|
29
|
+
|
30
|
+
def bam_cigar_oplen(c)
|
31
|
+
c >> BAM_CIGAR_SHIFT
|
32
|
+
end
|
33
|
+
|
34
|
+
def bam_cigar_opchr(c)
|
35
|
+
("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
|
36
|
+
end
|
37
|
+
|
38
|
+
def bam_cigar_gen(l, o)
|
39
|
+
l << BAM_CIGAR_SHIFT | o
|
40
|
+
end
|
41
|
+
|
42
|
+
def bam_cigar_type(o)
|
43
|
+
BAM_CIGAR_TYPE >> (o << 1) & 3
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
BAM_FPAIRED = 1
|
48
|
+
BAM_FPROPER_PAIR = 2
|
49
|
+
BAM_FUNMAP = 4
|
50
|
+
BAM_FMUNMAP = 8
|
51
|
+
BAM_FREVERSE = 16
|
52
|
+
BAM_FMREVERSE = 32
|
53
|
+
BAM_FREAD1 = 64
|
54
|
+
BAM_FREAD2 = 128
|
55
|
+
BAM_FSECONDARY = 256
|
56
|
+
BAM_FQCFAIL = 512
|
57
|
+
BAM_FDUP = 1024
|
58
|
+
BAM_FSUPPLEMENTARY = 2048
|
59
|
+
|
60
|
+
# macros
|
61
|
+
# function-like macros
|
62
|
+
class << self
|
63
|
+
def bam_is_rev(b)
|
64
|
+
b[:core][:flag] & BAM_FREVERSE != 0
|
65
|
+
end
|
66
|
+
|
67
|
+
def bam_is_mrev(b)
|
68
|
+
b[:core][:flag] & BAM_FMREVERSE != 0
|
69
|
+
end
|
70
|
+
|
71
|
+
def bam_get_qname(b)
|
72
|
+
b[:data]
|
73
|
+
end
|
74
|
+
|
75
|
+
def bam_get_cigar(b)
|
76
|
+
b[:data] + b[:core][:l_qname]
|
77
|
+
end
|
78
|
+
|
79
|
+
def bam_get_seq(b)
|
80
|
+
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
|
81
|
+
end
|
82
|
+
|
83
|
+
def bam_get_qual(b)
|
84
|
+
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
|
85
|
+
end
|
86
|
+
|
87
|
+
def bam_get_aux(b)
|
88
|
+
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
|
89
|
+
end
|
90
|
+
|
91
|
+
def bam_get_l_aux(b)
|
92
|
+
b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
|
93
|
+
end
|
94
|
+
|
95
|
+
def bam_seqi(s, i)
|
96
|
+
s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
|
97
|
+
end
|
98
|
+
|
99
|
+
# def bam_set_seqi(s, i, b)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|