htslib 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -16
- data/lib/hts/bam.rb +38 -32
- data/lib/hts/bam/cigar.rb +3 -3
- data/lib/hts/bam/flag.rb +91 -0
- data/lib/hts/bam/header.rb +2 -2
- data/lib/hts/bam/{alignment.rb → record.rb} +49 -43
- data/lib/hts/fai.rb +8 -8
- data/lib/hts/libhts.rb +141 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +114 -43
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +7 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +25 -25
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +1 -1
- data/lib/hts/vcf.rb +17 -17
- data/lib/hts/vcf/format.rb +24 -0
- data/lib/hts/vcf/header.rb +2 -2
- data/lib/hts/vcf/info.rb +24 -0
- data/lib/hts/vcf/{variant.rb → record.rb} +2 -2
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +20 -17
- data/lib/hts/ffi.rb +0 -85
data/lib/hts/fai.rb
CHANGED
@@ -18,30 +18,30 @@ module HTS
|
|
18
18
|
|
19
19
|
def initialize(path)
|
20
20
|
@path = File.expand_path(path)
|
21
|
-
@path.delete_suffix!(
|
22
|
-
|
23
|
-
@fai =
|
21
|
+
@path.delete_suffix!(".fai")
|
22
|
+
LibHTS.fai_build(@path) unless File.exist?("#{@path}.fai")
|
23
|
+
@fai = LibHTS.fai_load(@path)
|
24
24
|
raise if @fai.null?
|
25
25
|
|
26
|
-
# at_exit{
|
26
|
+
# at_exit{LibHTS.fai_destroy(@fai)}
|
27
27
|
end
|
28
28
|
|
29
29
|
def close
|
30
|
-
|
30
|
+
LibHTS.fai_destroy(@fai)
|
31
31
|
end
|
32
32
|
|
33
33
|
# the number of sequences in the index.
|
34
34
|
def size
|
35
|
-
|
35
|
+
LibHTS.faidx_nseq(@fai)
|
36
36
|
end
|
37
37
|
alias length size
|
38
38
|
|
39
39
|
# return the length of the requested chromosome.
|
40
40
|
def chrom_size(chrom)
|
41
|
-
raise ArgumentError,
|
41
|
+
raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
42
42
|
|
43
43
|
chrom = chrom.to_s
|
44
|
-
result =
|
44
|
+
result = LibHTS.faidx_seq_len(@fai, chrom)
|
45
45
|
result == -1 ? nil : result
|
46
46
|
end
|
47
47
|
alias chrom_length chrom_size
|
data/lib/hts/libhts.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
module LibHTS
|
5
|
+
extend FFI::Library
|
6
|
+
|
7
|
+
begin
|
8
|
+
ffi_lib HTS.lib_path
|
9
|
+
rescue LoadError => e
|
10
|
+
raise LoadError, "#{e}\nCould not find #{HTS.lib_path}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.attach_function(*)
|
14
|
+
super
|
15
|
+
rescue FFI::NotFoundError => e
|
16
|
+
warn e.message
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module FFI
|
22
|
+
class Struct
|
23
|
+
class << self
|
24
|
+
# @example HtsOpt
|
25
|
+
# class HtsOpt < FFI::Struct
|
26
|
+
# layout \
|
27
|
+
# :arg, :string,
|
28
|
+
# :opt, HtsFmtOption,
|
29
|
+
# :val,
|
30
|
+
# union_layout(
|
31
|
+
# :i, :int,
|
32
|
+
# :s, :string
|
33
|
+
# ),
|
34
|
+
# :next, HtsOpt.ptr
|
35
|
+
# end
|
36
|
+
|
37
|
+
def union_layout(*args)
|
38
|
+
Class.new(FFI::Union) { layout(*args) }
|
39
|
+
end
|
40
|
+
|
41
|
+
# @example HtsFormat
|
42
|
+
# class HtsFormat < FFI::Struct
|
43
|
+
# layout \
|
44
|
+
# :category, HtsFormatCategory,
|
45
|
+
# :format, HtsExactFormat,
|
46
|
+
# :version,
|
47
|
+
# struct_layout(
|
48
|
+
# :major, :short,
|
49
|
+
# :minor, :short
|
50
|
+
# ),
|
51
|
+
# :compression, HtsCompression,
|
52
|
+
# :compression_level, :short,
|
53
|
+
# :specific, :pointer
|
54
|
+
# end
|
55
|
+
|
56
|
+
def struct_layout(*args)
|
57
|
+
Class.new(FFI::Struct) { layout(*args) }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Struct that support bit fields.
|
63
|
+
# Currently readonly.
|
64
|
+
class BitStruct < Struct
|
65
|
+
class << self
|
66
|
+
module BitFieldsModule
|
67
|
+
def [](name)
|
68
|
+
bit_fields = self.class.bit_fields_hash_table
|
69
|
+
parent, start, width = bit_fields[name]
|
70
|
+
if parent
|
71
|
+
(super(parent) >> start) & ((1 << width) - 1)
|
72
|
+
else
|
73
|
+
super(name)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
private_constant :BitFieldsModule
|
78
|
+
|
79
|
+
attr_reader :bit_fields_hash_table
|
80
|
+
|
81
|
+
# @example Bcf1
|
82
|
+
# class Bcf1 < FFI::BitStruct
|
83
|
+
# layout \
|
84
|
+
# :pos, :hts_pos_t,
|
85
|
+
# :rlen, :hts_pos_t,
|
86
|
+
# :rid, :int32_t,
|
87
|
+
# :qual, :float,
|
88
|
+
# :_n_info_allele, :uint32_t,
|
89
|
+
# :_n_fmt_sample, :uint32_t,
|
90
|
+
# :shared, KString,
|
91
|
+
# :indiv, KString,
|
92
|
+
# :d, BcfDec,
|
93
|
+
# :max_unpack, :int,
|
94
|
+
# :unpacked, :int,
|
95
|
+
# :unpack_size, [:int, 3],
|
96
|
+
# :errcode, :int
|
97
|
+
#
|
98
|
+
# bit_fields :_n_info_allele,
|
99
|
+
# :n_info, 16,
|
100
|
+
# :n_allele, 16
|
101
|
+
#
|
102
|
+
# bit_fields :_n_fmt_sample,
|
103
|
+
# :n_fmt, 8,
|
104
|
+
# :n_sample, 24
|
105
|
+
# end
|
106
|
+
|
107
|
+
def bit_fields(*args)
|
108
|
+
unless instance_variable_defined?(:@bit_fields_hash_table)
|
109
|
+
@bit_fields_hash_table = {}
|
110
|
+
prepend BitFieldsModule
|
111
|
+
end
|
112
|
+
|
113
|
+
parent = args.shift
|
114
|
+
labels = []
|
115
|
+
widths = []
|
116
|
+
args.each_slice(2) do |l, w|
|
117
|
+
labels << l
|
118
|
+
widths << w
|
119
|
+
end
|
120
|
+
starts = widths.inject([0]) do |result, w|
|
121
|
+
result << (result.last + w)
|
122
|
+
end
|
123
|
+
labels.zip(starts, widths).each do |l, s, w|
|
124
|
+
@bit_fields_hash_table[l] = [parent, s, w]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
require_relative "libhts/constants"
|
132
|
+
|
133
|
+
# alphabetical order
|
134
|
+
require_relative "libhts/bgzf"
|
135
|
+
require_relative "libhts/faidx"
|
136
|
+
require_relative "libhts/hfile"
|
137
|
+
require_relative "libhts/hts"
|
138
|
+
require_relative "libhts/sam"
|
139
|
+
require_relative "libhts/kfunc"
|
140
|
+
require_relative "libhts/tbx"
|
141
|
+
require_relative "libhts/vcf"
|
@@ -1,34 +1,34 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
typedef :pointer, :HFILE
|
6
6
|
typedef :int64, :hts_pos_t
|
7
7
|
typedef :pointer, :bam_plp_auto_f
|
8
8
|
|
9
9
|
# kstring
|
10
10
|
|
11
|
-
class KString <
|
11
|
+
class KString < FFI::Struct
|
12
12
|
layout \
|
13
13
|
:l, :size_t,
|
14
14
|
:m, :size_t,
|
15
15
|
:s, :string
|
16
16
|
end
|
17
17
|
|
18
|
-
class KSeq <
|
18
|
+
class KSeq < FFI::Struct
|
19
19
|
layout \
|
20
20
|
:name, KString,
|
21
21
|
:comment, KString,
|
22
22
|
:seq, KString,
|
23
23
|
:qual, KString,
|
24
24
|
:last_char, :int,
|
25
|
-
:f, :pointer #
|
25
|
+
:f, :pointer # kstream_t
|
26
26
|
end
|
27
27
|
|
28
28
|
# BGZF
|
29
|
-
class BGZF <
|
29
|
+
class BGZF < FFI::BitStruct
|
30
30
|
layout \
|
31
|
-
:
|
31
|
+
:_flags, :uint, # bit_fields
|
32
32
|
:cache_size, :int,
|
33
33
|
:block_length, :int,
|
34
34
|
:block_clength, :int,
|
@@ -44,9 +44,29 @@ module HTS
|
|
44
44
|
:idx_build_otf, :int,
|
45
45
|
:gz_stream, :pointer,
|
46
46
|
:seeked, :int64
|
47
|
+
|
48
|
+
bit_fields :_flags,
|
49
|
+
:errcode, 16,
|
50
|
+
:_reserved, 1,
|
51
|
+
:is_write, 1,
|
52
|
+
:no_eof_block, 1,
|
53
|
+
:is_be, 1,
|
54
|
+
:compress_level, 9,
|
55
|
+
:last_block_eof, 1,
|
56
|
+
:is_compressed, 1,
|
57
|
+
:is_gzip, 1
|
47
58
|
end
|
48
59
|
|
49
60
|
# hts
|
61
|
+
HtsLogLevel = enum(
|
62
|
+
:off, # All logging disabled.
|
63
|
+
:error, # Logging of errors only.
|
64
|
+
:warning, 3, # Logging of errors and warnings.
|
65
|
+
:info, # Logging of errors, warnings, and normal but significant events.
|
66
|
+
:debug, # Logging of all except the most detailed debug events.
|
67
|
+
:trace # All logging enabled.
|
68
|
+
)
|
69
|
+
|
50
70
|
HtsFormatCategory = enum(
|
51
71
|
:unknown_category,
|
52
72
|
:sequence_data, # Sequence data -- SAM, BAM, CRAM, etc
|
@@ -105,7 +125,7 @@ module HTS
|
|
105
125
|
:HTS_OPT_BLOCK_SIZE
|
106
126
|
)
|
107
127
|
|
108
|
-
class HtsFormat <
|
128
|
+
class HtsFormat < FFI::Struct
|
109
129
|
layout \
|
110
130
|
:category, HtsFormatCategory,
|
111
131
|
:format, HtsExactFormat,
|
@@ -119,7 +139,7 @@ module HTS
|
|
119
139
|
:specific, :pointer
|
120
140
|
end
|
121
141
|
|
122
|
-
class HtsIdx <
|
142
|
+
class HtsIdx < FFI::Struct
|
123
143
|
layout \
|
124
144
|
:fmt, :int,
|
125
145
|
:min_shift, :int,
|
@@ -152,7 +172,7 @@ module HTS
|
|
152
172
|
end
|
153
173
|
|
154
174
|
# HtsFile
|
155
|
-
class SamHdr <
|
175
|
+
class SamHdr < FFI::Struct
|
156
176
|
layout \
|
157
177
|
:n_targets, :int32,
|
158
178
|
:ignore_sam_err, :int32,
|
@@ -168,9 +188,9 @@ module HTS
|
|
168
188
|
|
169
189
|
BamHdr = SamHdr
|
170
190
|
|
171
|
-
class HtsFile <
|
191
|
+
class HtsFile < FFI::BitStruct
|
172
192
|
layout \
|
173
|
-
:
|
193
|
+
:_flags, :uint32, # bit_fields
|
174
194
|
:lineno, :int64,
|
175
195
|
:line, KString,
|
176
196
|
:fn, :string,
|
@@ -186,17 +206,25 @@ module HTS
|
|
186
206
|
:idx, HtsIdx.ptr,
|
187
207
|
:fnidx, :string,
|
188
208
|
:bam_header, SamHdr.ptr
|
209
|
+
|
210
|
+
bit_fields :_flags,
|
211
|
+
:is_bin, 1,
|
212
|
+
:is_write, 1,
|
213
|
+
:is_be, 1,
|
214
|
+
:is_cram, 1,
|
215
|
+
:is_bgzf, 1,
|
216
|
+
:dummy, 27
|
189
217
|
end
|
190
218
|
|
191
219
|
SamFile = HtsFile
|
192
220
|
|
193
|
-
class HtsThreadPool <
|
221
|
+
class HtsThreadPool < FFI::Struct
|
194
222
|
layout \
|
195
223
|
:pool, :pointer,
|
196
224
|
:qsize, :int
|
197
225
|
end
|
198
226
|
|
199
|
-
class HtsOpt <
|
227
|
+
class HtsOpt < FFI::Struct
|
200
228
|
layout \
|
201
229
|
:arg, :string,
|
202
230
|
:opt, HtsFmtOption,
|
@@ -208,9 +236,9 @@ module HTS
|
|
208
236
|
:next, HtsOpt.ptr
|
209
237
|
end
|
210
238
|
|
211
|
-
class HtsItr <
|
239
|
+
class HtsItr < FFI::BitStruct
|
212
240
|
layout \
|
213
|
-
:
|
241
|
+
:_flags, :uint32, # bit_fields
|
214
242
|
:tid, :int,
|
215
243
|
:n_off, :int,
|
216
244
|
:i, :int,
|
@@ -235,9 +263,17 @@ module HTS
|
|
235
263
|
:m, :int,
|
236
264
|
:a, :pointer
|
237
265
|
)
|
266
|
+
|
267
|
+
bit_fields :_flags,
|
268
|
+
:read_rest, 1,
|
269
|
+
:finished, 1,
|
270
|
+
:is_cram, 1,
|
271
|
+
:nocoor, 1,
|
272
|
+
:multi, 1,
|
273
|
+
:dummy, 27
|
238
274
|
end
|
239
275
|
|
240
|
-
class Bam1Core <
|
276
|
+
class Bam1Core < FFI::Struct
|
241
277
|
layout \
|
242
278
|
:pos, :hts_pos_t,
|
243
279
|
:tid, :int32,
|
@@ -253,23 +289,50 @@ module HTS
|
|
253
289
|
:isize, :hts_pos_t
|
254
290
|
end
|
255
291
|
|
256
|
-
class Bam1 <
|
292
|
+
class Bam1 < FFI::Struct
|
257
293
|
layout \
|
258
294
|
:core, Bam1Core,
|
259
295
|
:id, :uint64,
|
260
296
|
:data, :pointer, # uint8_t
|
261
297
|
:l_data, :int,
|
262
298
|
:m_data, :uint32,
|
263
|
-
:
|
299
|
+
:_mempolicy, :uint32 # bit_fields
|
300
|
+
|
301
|
+
# bit_fields :_mempolicy,
|
302
|
+
# :mempolicy, 2,
|
303
|
+
# :_reserved, 30
|
264
304
|
end
|
265
305
|
|
266
|
-
|
306
|
+
typedef :pointer, :bam_plp
|
307
|
+
typedef :pointer, :bam_mplp
|
308
|
+
|
309
|
+
class BamPileupCd < FFI::Union
|
310
|
+
layout \
|
311
|
+
:p, :pointer,
|
312
|
+
:i, :int64_t,
|
313
|
+
:f, :double
|
267
314
|
end
|
268
315
|
|
269
|
-
class
|
316
|
+
class BamPileup1 < FFI::BitStruct
|
317
|
+
layout \
|
318
|
+
:b, Bam1.ptr,
|
319
|
+
:qpos, :int32_t,
|
320
|
+
:indel, :int,
|
321
|
+
:level, :int,
|
322
|
+
:_flags, :uint32_t, # bit_fields
|
323
|
+
:cd, BamPileupCd,
|
324
|
+
:cigar_ind, :int
|
325
|
+
|
326
|
+
bit_fields :_flags,
|
327
|
+
:is_del, 1,
|
328
|
+
:is_head, 1,
|
329
|
+
:is_tail, 1,
|
330
|
+
:is_refskip, 1,
|
331
|
+
:_reserved, 1,
|
332
|
+
:aux, 27
|
270
333
|
end
|
271
334
|
|
272
|
-
class TbxConf <
|
335
|
+
class TbxConf < FFI::Struct
|
273
336
|
layout \
|
274
337
|
:preset, :int32,
|
275
338
|
:sc, :int32,
|
@@ -279,7 +342,7 @@ module HTS
|
|
279
342
|
:line_skip, :int32
|
280
343
|
end
|
281
344
|
|
282
|
-
class Tbx <
|
345
|
+
class Tbx < FFI::Struct
|
283
346
|
layout \
|
284
347
|
:conf, TbxConf.ptr,
|
285
348
|
:idx, HtsIdx.ptr,
|
@@ -290,7 +353,7 @@ module HTS
|
|
290
353
|
|
291
354
|
FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
|
292
355
|
|
293
|
-
class Faidx <
|
356
|
+
class Faidx < FFI::Struct
|
294
357
|
layout :bgzf, BGZF,
|
295
358
|
:n, :int,
|
296
359
|
:m, :int,
|
@@ -301,13 +364,13 @@ module HTS
|
|
301
364
|
|
302
365
|
# vcf
|
303
366
|
|
304
|
-
class
|
367
|
+
class BcfVariant < FFI::Struct
|
305
368
|
layout \
|
306
369
|
:type, :int,
|
307
370
|
:n, :int
|
308
371
|
end
|
309
372
|
|
310
|
-
class BcfHrec <
|
373
|
+
class BcfHrec < FFI::Struct
|
311
374
|
layout \
|
312
375
|
:type, :int,
|
313
376
|
:key, :string,
|
@@ -317,7 +380,7 @@ module HTS
|
|
317
380
|
:vals, :pointer
|
318
381
|
end
|
319
382
|
|
320
|
-
class BcfFmt <
|
383
|
+
class BcfFmt < FFI::BitStruct
|
321
384
|
layout \
|
322
385
|
:id, :int,
|
323
386
|
:n, :int,
|
@@ -325,10 +388,14 @@ module HTS
|
|
325
388
|
:type, :int,
|
326
389
|
:p, :pointer, # uint8_t
|
327
390
|
:p_len, :uint32,
|
328
|
-
:
|
391
|
+
:_p_off_free, :uint32 # bit_fields
|
392
|
+
|
393
|
+
bit_fields :_p_off_free,
|
394
|
+
:p_off, 31,
|
395
|
+
:p_free, 1
|
329
396
|
end
|
330
397
|
|
331
|
-
class BcfInfo <
|
398
|
+
class BcfInfo < FFI::BitStruct
|
332
399
|
layout \
|
333
400
|
:key, :int,
|
334
401
|
:type, :int,
|
@@ -339,24 +406,28 @@ module HTS
|
|
339
406
|
),
|
340
407
|
:vptr, :pointer,
|
341
408
|
:vptr_len, :uint32,
|
342
|
-
:
|
409
|
+
:_vptr_off_free, :uint32, # bit_fields
|
343
410
|
:len, :int
|
411
|
+
|
412
|
+
bit_fields :_vptr_off_free,
|
413
|
+
:vptr_off, 31,
|
414
|
+
:vptr_free, 1
|
344
415
|
end
|
345
416
|
|
346
|
-
class BcfIdinfo <
|
417
|
+
class BcfIdinfo < FFI::Struct
|
347
418
|
layout \
|
348
419
|
:info, [:uint8, 3],
|
349
420
|
:hrec, [BcfHrec.ptr, 3],
|
350
421
|
:id, :int
|
351
422
|
end
|
352
423
|
|
353
|
-
class BcfIdpair <
|
424
|
+
class BcfIdpair < FFI::Struct
|
354
425
|
layout \
|
355
426
|
:key, :string,
|
356
427
|
:val, BcfIdinfo.ptr
|
357
428
|
end
|
358
429
|
|
359
|
-
class BcfHdr <
|
430
|
+
class BcfHdr < FFI::Struct
|
360
431
|
layout \
|
361
432
|
:n, [:int, 3],
|
362
433
|
:id, [BcfIdpair.ptr, 3],
|
@@ -373,7 +444,7 @@ module HTS
|
|
373
444
|
:m, [:int, 3]
|
374
445
|
end
|
375
446
|
|
376
|
-
class BcfDec <
|
447
|
+
class BcfDec < FFI::Struct
|
377
448
|
layout \
|
378
449
|
:m_fmt, :int,
|
379
450
|
:m_info, :int,
|
@@ -388,21 +459,21 @@ module HTS
|
|
388
459
|
:allele, :pointer,
|
389
460
|
:info, BcfInfo.ptr,
|
390
461
|
:fmt, BcfFmt.ptr,
|
391
|
-
:var,
|
462
|
+
:var, BcfVariant.ptr,
|
392
463
|
:n_var, :int,
|
393
464
|
:var_type, :int,
|
394
465
|
:shared_dirty, :int,
|
395
466
|
:indiv_dirty, :int
|
396
467
|
end
|
397
468
|
|
398
|
-
class Bcf1 <
|
469
|
+
class Bcf1 < FFI::BitStruct
|
399
470
|
layout \
|
400
471
|
:pos, :hts_pos_t,
|
401
472
|
:rlen, :hts_pos_t,
|
402
473
|
:rid, :int32_t,
|
403
474
|
:qual, :float,
|
404
|
-
:
|
405
|
-
:
|
475
|
+
:_n_info_allele, :uint32_t,
|
476
|
+
:_n_fmt_sample, :uint32_t,
|
406
477
|
:shared, KString,
|
407
478
|
:indiv, KString,
|
408
479
|
:d, BcfDec,
|
@@ -411,13 +482,13 @@ module HTS
|
|
411
482
|
:unpack_size, [:int, 3],
|
412
483
|
:errcode, :int
|
413
484
|
|
414
|
-
|
415
|
-
|
416
|
-
|
485
|
+
bit_fields :_n_info_allele,
|
486
|
+
:n_info, 16,
|
487
|
+
:n_allele, 16
|
417
488
|
|
418
|
-
|
419
|
-
|
420
|
-
|
489
|
+
bit_fields :_n_fmt_sample,
|
490
|
+
:n_fmt, 8,
|
491
|
+
:n_sample, 24
|
421
492
|
end
|
422
493
|
end
|
423
494
|
end
|