htslib 0.0.0 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +91 -12
- data/lib/hts/bam/cigar.rb +33 -0
- data/lib/hts/bam/flag.rb +93 -0
- data/lib/hts/bam/header.rb +33 -0
- data/lib/hts/bam/record.rb +176 -0
- data/lib/hts/bam.rb +109 -82
- data/lib/hts/bcf/format.rb +52 -0
- data/lib/hts/bcf/header.rb +19 -0
- data/lib/hts/bcf/info.rb +40 -0
- data/lib/hts/bcf/record.rb +116 -0
- data/lib/hts/bcf.rb +75 -0
- data/lib/hts/faidx.rb +50 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +2 -2
- data/lib/hts/{ffi_constants.rb → libhts/constants.rb} +137 -75
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +2 -2
- data/lib/hts/{ffi → libhts}/hts.rb +9 -3
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +71 -31
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +188 -12
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +20 -0
- data/lib/hts/utils/open_method.rb +17 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +34 -7
- metadata +60 -22
- data/lib/hts/fai.rb +0 -16
- data/lib/hts/ffi/struct.rb +0 -14
- data/lib/hts/ffi.rb +0 -32
- data/lib/hts/tbx.rb +0 -14
- data/lib/hts/vcf.rb +0 -30
@@ -1,24 +1,34 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
typedef :pointer, :HFILE
|
6
6
|
typedef :int64, :hts_pos_t
|
7
7
|
typedef :pointer, :bam_plp_auto_f
|
8
8
|
|
9
9
|
# kstring
|
10
10
|
|
11
|
-
class
|
11
|
+
class KString < FFI::Struct
|
12
12
|
layout \
|
13
13
|
:l, :size_t,
|
14
14
|
:m, :size_t,
|
15
15
|
:s, :string
|
16
16
|
end
|
17
17
|
|
18
|
+
class KSeq < FFI::Struct
|
19
|
+
layout \
|
20
|
+
:name, KString,
|
21
|
+
:comment, KString,
|
22
|
+
:seq, KString,
|
23
|
+
:qual, KString,
|
24
|
+
:last_char, :int,
|
25
|
+
:f, :pointer # kstream_t
|
26
|
+
end
|
27
|
+
|
18
28
|
# BGZF
|
19
|
-
class BGZF <
|
29
|
+
class BGZF < FFI::BitStruct
|
20
30
|
layout \
|
21
|
-
:
|
31
|
+
:_flags, :uint, # bit_fields
|
22
32
|
:cache_size, :int,
|
23
33
|
:block_length, :int,
|
24
34
|
:block_clength, :int,
|
@@ -34,9 +44,29 @@ module HTS
|
|
34
44
|
:idx_build_otf, :int,
|
35
45
|
:gz_stream, :pointer,
|
36
46
|
:seeked, :int64
|
47
|
+
|
48
|
+
bit_fields :_flags,
|
49
|
+
:errcode, 16,
|
50
|
+
:_reserved, 1,
|
51
|
+
:is_write, 1,
|
52
|
+
:no_eof_block, 1,
|
53
|
+
:is_be, 1,
|
54
|
+
:compress_level, 9,
|
55
|
+
:last_block_eof, 1,
|
56
|
+
:is_compressed, 1,
|
57
|
+
:is_gzip, 1
|
37
58
|
end
|
38
59
|
|
39
60
|
# hts
|
61
|
+
HtsLogLevel = enum(
|
62
|
+
:off, # All logging disabled.
|
63
|
+
:error, # Logging of errors only.
|
64
|
+
:warning, 3, # Logging of errors and warnings.
|
65
|
+
:info, # Logging of errors, warnings, and normal but significant events.
|
66
|
+
:debug, # Logging of all except the most detailed debug events.
|
67
|
+
:trace # All logging enabled.
|
68
|
+
)
|
69
|
+
|
40
70
|
HtsFormatCategory = enum(
|
41
71
|
:unknown_category,
|
42
72
|
:sequence_data, # Sequence data -- SAM, BAM, CRAM, etc
|
@@ -95,7 +125,7 @@ module HTS
|
|
95
125
|
:HTS_OPT_BLOCK_SIZE
|
96
126
|
)
|
97
127
|
|
98
|
-
class HtsFormat <
|
128
|
+
class HtsFormat < FFI::Struct
|
99
129
|
layout \
|
100
130
|
:category, HtsFormatCategory,
|
101
131
|
:format, HtsExactFormat,
|
@@ -109,7 +139,7 @@ module HTS
|
|
109
139
|
:specific, :pointer
|
110
140
|
end
|
111
141
|
|
112
|
-
class HtsIdx <
|
142
|
+
class HtsIdx < FFI::Struct
|
113
143
|
layout \
|
114
144
|
:fmt, :int,
|
115
145
|
:min_shift, :int,
|
@@ -141,7 +171,8 @@ module HTS
|
|
141
171
|
)
|
142
172
|
end
|
143
173
|
|
144
|
-
|
174
|
+
# HtsFile
|
175
|
+
class SamHdr < FFI::Struct
|
145
176
|
layout \
|
146
177
|
:n_targets, :int32,
|
147
178
|
:ignore_sam_err, :int32,
|
@@ -157,11 +188,11 @@ module HTS
|
|
157
188
|
|
158
189
|
BamHdr = SamHdr
|
159
190
|
|
160
|
-
class HtsFile <
|
191
|
+
class HtsFile < FFI::BitStruct
|
161
192
|
layout \
|
162
|
-
:
|
193
|
+
:_flags, :uint32, # bit_fields
|
163
194
|
:lineno, :int64,
|
164
|
-
:line,
|
195
|
+
:line, KString,
|
165
196
|
:fn, :string,
|
166
197
|
:fn_aux, :string,
|
167
198
|
:fp,
|
@@ -175,17 +206,25 @@ module HTS
|
|
175
206
|
:idx, HtsIdx.ptr,
|
176
207
|
:fnidx, :string,
|
177
208
|
:bam_header, SamHdr.ptr
|
209
|
+
|
210
|
+
bit_fields :_flags,
|
211
|
+
:is_bin, 1,
|
212
|
+
:is_write, 1,
|
213
|
+
:is_be, 1,
|
214
|
+
:is_cram, 1,
|
215
|
+
:is_bgzf, 1,
|
216
|
+
:dummy, 27
|
178
217
|
end
|
179
218
|
|
180
219
|
SamFile = HtsFile
|
181
220
|
|
182
|
-
class HtsThreadPool <
|
221
|
+
class HtsThreadPool < FFI::Struct
|
183
222
|
layout \
|
184
223
|
:pool, :pointer,
|
185
224
|
:qsize, :int
|
186
225
|
end
|
187
226
|
|
188
|
-
class HtsOpt <
|
227
|
+
class HtsOpt < FFI::Struct
|
189
228
|
layout \
|
190
229
|
:arg, :string,
|
191
230
|
:opt, HtsFmtOption,
|
@@ -197,9 +236,9 @@ module HTS
|
|
197
236
|
:next, HtsOpt.ptr
|
198
237
|
end
|
199
238
|
|
200
|
-
class HtsItr <
|
239
|
+
class HtsItr < FFI::BitStruct
|
201
240
|
layout \
|
202
|
-
:
|
241
|
+
:_flags, :uint32, # bit_fields
|
203
242
|
:tid, :int,
|
204
243
|
:n_off, :int,
|
205
244
|
:i, :int,
|
@@ -224,8 +263,17 @@ module HTS
|
|
224
263
|
:m, :int,
|
225
264
|
:a, :pointer
|
226
265
|
)
|
266
|
+
|
267
|
+
bit_fields :_flags,
|
268
|
+
:read_rest, 1,
|
269
|
+
:finished, 1,
|
270
|
+
:is_cram, 1,
|
271
|
+
:nocoor, 1,
|
272
|
+
:multi, 1,
|
273
|
+
:dummy, 27
|
227
274
|
end
|
228
|
-
|
275
|
+
|
276
|
+
class Bam1Core < FFI::Struct
|
229
277
|
layout \
|
230
278
|
:pos, :hts_pos_t,
|
231
279
|
:tid, :int32,
|
@@ -241,53 +289,50 @@ module HTS
|
|
241
289
|
:isize, :hts_pos_t
|
242
290
|
end
|
243
291
|
|
244
|
-
class Bam1 <
|
292
|
+
class Bam1 < FFI::Struct
|
245
293
|
layout \
|
246
294
|
:core, Bam1Core,
|
247
295
|
:id, :uint64,
|
248
296
|
:data, :pointer, # uint8_t
|
249
297
|
:l_data, :int,
|
250
298
|
:m_data, :uint32,
|
251
|
-
:
|
299
|
+
:_mempolicy, :uint32 # bit_fields
|
300
|
+
|
301
|
+
# bit_fields :_mempolicy,
|
302
|
+
# :mempolicy, 2,
|
303
|
+
# :_reserved, 30
|
252
304
|
end
|
253
305
|
|
254
|
-
|
306
|
+
typedef :pointer, :bam_plp
|
307
|
+
typedef :pointer, :bam_mplp
|
308
|
+
|
309
|
+
class BamPileupCd < FFI::Union
|
310
|
+
layout \
|
311
|
+
:p, :pointer,
|
312
|
+
:i, :int64_t,
|
313
|
+
:f, :double
|
255
314
|
end
|
256
315
|
|
257
|
-
class
|
316
|
+
class BamPileup1 < FFI::BitStruct
|
317
|
+
layout \
|
318
|
+
:b, Bam1.ptr,
|
319
|
+
:qpos, :int32_t,
|
320
|
+
:indel, :int,
|
321
|
+
:level, :int,
|
322
|
+
:_flags, :uint32_t, # bit_fields
|
323
|
+
:cd, BamPileupCd,
|
324
|
+
:cigar_ind, :int
|
325
|
+
|
326
|
+
bit_fields :_flags,
|
327
|
+
:is_del, 1,
|
328
|
+
:is_head, 1,
|
329
|
+
:is_tail, 1,
|
330
|
+
:is_refskip, 1,
|
331
|
+
:_reserved, 1,
|
332
|
+
:aux, 27
|
258
333
|
end
|
259
334
|
|
260
|
-
|
261
|
-
BAM_CINS = 1
|
262
|
-
BAM_CDEL = 2
|
263
|
-
BAM_CREF_SKIP = 3
|
264
|
-
BAM_CSOFT_CLIP = 4
|
265
|
-
BAM_CHARD_CLIP = 5
|
266
|
-
BAM_CPAD = 6
|
267
|
-
BAM_CEQUAL = 7
|
268
|
-
BAM_CDIFF = 8
|
269
|
-
BAM_CBACK = 9
|
270
|
-
|
271
|
-
BAM_CIGAR_STR = 'MIDNSHP=XB'
|
272
|
-
BAM_CIGAR_STR_PADDED = 'MIDNSHP=XB??????'
|
273
|
-
BAM_CIGAR_SHIFT = 4
|
274
|
-
BAM_CIGAR_MASK = 0xf
|
275
|
-
BAM_CIGAR_TYPE = 0x3C1A7
|
276
|
-
|
277
|
-
BAM_FPAIRED = 1
|
278
|
-
BAM_FPROPER_PAIR = 2
|
279
|
-
BAM_FUNMAP = 4
|
280
|
-
BAM_FMUNMAP = 8
|
281
|
-
BAM_FREVERSE = 16
|
282
|
-
BAM_FMREVERSE = 32
|
283
|
-
BAM_FREAD1 = 64
|
284
|
-
BAM_FREAD2 = 128
|
285
|
-
BAM_FSECONDARY = 256
|
286
|
-
BAM_FQCFAIL = 512
|
287
|
-
BAM_FDUP = 1024
|
288
|
-
BAM_FSUPPLEMENTARY = 2048
|
289
|
-
|
290
|
-
class TbxConf < ::FFI::Struct
|
335
|
+
class TbxConf < FFI::Struct
|
291
336
|
layout \
|
292
337
|
:preset, :int32,
|
293
338
|
:sc, :int32,
|
@@ -297,7 +342,7 @@ module HTS
|
|
297
342
|
:line_skip, :int32
|
298
343
|
end
|
299
344
|
|
300
|
-
class Tbx <
|
345
|
+
class Tbx < FFI::Struct
|
301
346
|
layout \
|
302
347
|
:conf, TbxConf.ptr,
|
303
348
|
:idx, HtsIdx.ptr,
|
@@ -308,7 +353,7 @@ module HTS
|
|
308
353
|
|
309
354
|
FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
|
310
355
|
|
311
|
-
class Faidx <
|
356
|
+
class Faidx < FFI::Struct
|
312
357
|
layout :bgzf, BGZF,
|
313
358
|
:n, :int,
|
314
359
|
:m, :int,
|
@@ -317,15 +362,15 @@ module HTS
|
|
317
362
|
:format, FaiFormatOptions
|
318
363
|
end
|
319
364
|
|
320
|
-
#
|
365
|
+
# bcf
|
321
366
|
|
322
|
-
class
|
367
|
+
class BcfVariant < FFI::Struct
|
323
368
|
layout \
|
324
369
|
:type, :int,
|
325
370
|
:n, :int
|
326
371
|
end
|
327
372
|
|
328
|
-
class BcfHrec <
|
373
|
+
class BcfHrec < FFI::Struct
|
329
374
|
layout \
|
330
375
|
:type, :int,
|
331
376
|
:key, :string,
|
@@ -335,7 +380,7 @@ module HTS
|
|
335
380
|
:vals, :pointer
|
336
381
|
end
|
337
382
|
|
338
|
-
class BcfFmt <
|
383
|
+
class BcfFmt < FFI::BitStruct
|
339
384
|
layout \
|
340
385
|
:id, :int,
|
341
386
|
:n, :int,
|
@@ -343,10 +388,14 @@ module HTS
|
|
343
388
|
:type, :int,
|
344
389
|
:p, :pointer, # uint8_t
|
345
390
|
:p_len, :uint32,
|
346
|
-
:
|
391
|
+
:_p_off_free, :uint32 # bit_fields
|
392
|
+
|
393
|
+
bit_fields :_p_off_free,
|
394
|
+
:p_off, 31,
|
395
|
+
:p_free, 1
|
347
396
|
end
|
348
397
|
|
349
|
-
class BcfInfo <
|
398
|
+
class BcfInfo < FFI::BitStruct
|
350
399
|
layout \
|
351
400
|
:key, :int,
|
352
401
|
:type, :int,
|
@@ -357,41 +406,45 @@ module HTS
|
|
357
406
|
),
|
358
407
|
:vptr, :pointer,
|
359
408
|
:vptr_len, :uint32,
|
360
|
-
:
|
409
|
+
:_vptr_off_free, :uint32, # bit_fields
|
361
410
|
:len, :int
|
411
|
+
|
412
|
+
bit_fields :_vptr_off_free,
|
413
|
+
:vptr_off, 31,
|
414
|
+
:vptr_free, 1
|
362
415
|
end
|
363
416
|
|
364
|
-
class BcfIdinfo <
|
417
|
+
class BcfIdinfo < FFI::Struct
|
365
418
|
layout \
|
366
|
-
:info, [:
|
419
|
+
:info, [:uint64_t, 3],
|
367
420
|
:hrec, [BcfHrec.ptr, 3],
|
368
421
|
:id, :int
|
369
422
|
end
|
370
423
|
|
371
|
-
class BcfIdpair <
|
424
|
+
class BcfIdpair < FFI::Struct
|
372
425
|
layout \
|
373
426
|
:key, :string,
|
374
427
|
:val, BcfIdinfo.ptr
|
375
428
|
end
|
376
429
|
|
377
|
-
class BcfHdr <
|
430
|
+
class BcfHdr < FFI::Struct
|
378
431
|
layout \
|
379
432
|
:n, [:int, 3],
|
380
|
-
:id, [
|
433
|
+
:id, [:pointer, 3], # BcfIdpair.ptr
|
381
434
|
:dict, [:pointer, 3],
|
382
435
|
:samples, :pointer,
|
383
436
|
:hrec, :pointer,
|
384
437
|
:nhrec, :int,
|
385
438
|
:dirty, :int,
|
386
439
|
:ntransl, :int,
|
387
|
-
:transl, :pointer,
|
440
|
+
:transl, [:pointer, 2],
|
388
441
|
:nsamples_ori, :int,
|
389
442
|
:keep_samples, :pointer,
|
390
|
-
:mem,
|
443
|
+
:mem, KString,
|
391
444
|
:m, [:int, 3]
|
392
445
|
end
|
393
446
|
|
394
|
-
class BcfDec <
|
447
|
+
class BcfDec < FFI::Struct
|
395
448
|
layout \
|
396
449
|
:m_fmt, :int,
|
397
450
|
:m_info, :int,
|
@@ -399,34 +452,43 @@ module HTS
|
|
399
452
|
:m_als, :int,
|
400
453
|
:m_allele, :int,
|
401
454
|
:m_flt, :int,
|
455
|
+
:n_flt, :int,
|
402
456
|
:flt, :pointer,
|
403
457
|
:id, :string,
|
404
|
-
:als, :string
|
458
|
+
:als, :pointer, # (\\0-separated string)
|
405
459
|
:allele, :pointer,
|
406
460
|
:info, BcfInfo.ptr,
|
407
461
|
:fmt, BcfFmt.ptr,
|
408
|
-
:var,
|
462
|
+
:var, BcfVariant.ptr,
|
409
463
|
:n_var, :int,
|
410
464
|
:var_type, :int,
|
411
465
|
:shared_dirty, :int,
|
412
466
|
:indiv_dirty, :int
|
413
467
|
end
|
414
468
|
|
415
|
-
class Bcf1 <
|
469
|
+
class Bcf1 < FFI::BitStruct
|
416
470
|
layout \
|
417
471
|
:pos, :hts_pos_t,
|
418
472
|
:rlen, :hts_pos_t,
|
419
|
-
:rid, :
|
473
|
+
:rid, :int32_t,
|
420
474
|
:qual, :float,
|
421
|
-
:
|
422
|
-
:
|
423
|
-
:shared,
|
424
|
-
:indiv,
|
475
|
+
:_n_info_allele, :uint32_t,
|
476
|
+
:_n_fmt_sample, :uint32_t,
|
477
|
+
:shared, KString,
|
478
|
+
:indiv, KString,
|
425
479
|
:d, BcfDec,
|
426
480
|
:max_unpack, :int,
|
427
481
|
:unpacked, :int,
|
428
482
|
:unpack_size, [:int, 3],
|
429
483
|
:errcode, :int
|
484
|
+
|
485
|
+
bit_fields :_n_info_allele,
|
486
|
+
:n_info, 16,
|
487
|
+
:n_allele, 16
|
488
|
+
|
489
|
+
bit_fields :_n_fmt_sample,
|
490
|
+
:n_fmt, 8,
|
491
|
+
:n_sample, 24
|
430
492
|
end
|
431
493
|
end
|
432
494
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
# Open the named file or URL as a stream
|
6
6
|
attach_function \
|
7
7
|
:hopen,
|
@@ -23,7 +23,7 @@ module HTS
|
|
23
23
|
# Append an extension or replace an existing extension
|
24
24
|
attach_function \
|
25
25
|
:haddextension,
|
26
|
-
[
|
26
|
+
[KString, :string, :int, :string],
|
27
27
|
:string
|
28
28
|
|
29
29
|
# Flush (for output streams) and close the stream
|
@@ -1,14 +1,20 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
# hts_expand
|
6
6
|
# hts_expand3
|
7
7
|
# hts_resize
|
8
8
|
|
9
|
+
# hts_log.h
|
10
|
+
attach_function \
|
11
|
+
:hts_get_log_level,
|
12
|
+
[],
|
13
|
+
HtsLogLevel
|
14
|
+
|
9
15
|
attach_function \
|
10
16
|
:hts_lib_shutdown,
|
11
|
-
[
|
17
|
+
[],
|
12
18
|
:void
|
13
19
|
|
14
20
|
attach_function \
|
@@ -110,7 +116,7 @@ module HTS
|
|
110
116
|
# Read a line (and its \n or \r\n terminator) from a file
|
111
117
|
attach_function \
|
112
118
|
:hts_getline,
|
113
|
-
[HtsFile, :int,
|
119
|
+
[HtsFile, :int, KString],
|
114
120
|
:int
|
115
121
|
|
116
122
|
attach_function \
|