htslib 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +25 -16
- data/lib/hts/bam.rb +38 -32
- data/lib/hts/bam/cigar.rb +3 -3
- data/lib/hts/bam/flag.rb +91 -0
- data/lib/hts/bam/header.rb +2 -2
- data/lib/hts/bam/{alignment.rb → record.rb} +49 -43
- data/lib/hts/fai.rb +8 -8
- data/lib/hts/libhts.rb +141 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +114 -43
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +7 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +25 -25
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +1 -1
- data/lib/hts/vcf.rb +17 -17
- data/lib/hts/vcf/format.rb +24 -0
- data/lib/hts/vcf/header.rb +2 -2
- data/lib/hts/vcf/info.rb +24 -0
- data/lib/hts/vcf/{variant.rb → record.rb} +2 -2
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +20 -17
- data/lib/hts/ffi.rb +0 -85
data/lib/hts/fai.rb
CHANGED
@@ -18,30 +18,30 @@ module HTS
|
|
18
18
|
|
19
19
|
def initialize(path)
|
20
20
|
@path = File.expand_path(path)
|
21
|
-
@path.delete_suffix!(
|
22
|
-
|
23
|
-
@fai =
|
21
|
+
@path.delete_suffix!(".fai")
|
22
|
+
LibHTS.fai_build(@path) unless File.exist?("#{@path}.fai")
|
23
|
+
@fai = LibHTS.fai_load(@path)
|
24
24
|
raise if @fai.null?
|
25
25
|
|
26
|
-
# at_exit{
|
26
|
+
# at_exit{LibHTS.fai_destroy(@fai)}
|
27
27
|
end
|
28
28
|
|
29
29
|
def close
|
30
|
-
|
30
|
+
LibHTS.fai_destroy(@fai)
|
31
31
|
end
|
32
32
|
|
33
33
|
# the number of sequences in the index.
|
34
34
|
def size
|
35
|
-
|
35
|
+
LibHTS.faidx_nseq(@fai)
|
36
36
|
end
|
37
37
|
alias length size
|
38
38
|
|
39
39
|
# return the length of the requested chromosome.
|
40
40
|
def chrom_size(chrom)
|
41
|
-
raise ArgumentError,
|
41
|
+
raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
42
42
|
|
43
43
|
chrom = chrom.to_s
|
44
|
-
result =
|
44
|
+
result = LibHTS.faidx_seq_len(@fai, chrom)
|
45
45
|
result == -1 ? nil : result
|
46
46
|
end
|
47
47
|
alias chrom_length chrom_size
|
data/lib/hts/libhts.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
module LibHTS
|
5
|
+
extend FFI::Library
|
6
|
+
|
7
|
+
begin
|
8
|
+
ffi_lib HTS.lib_path
|
9
|
+
rescue LoadError => e
|
10
|
+
raise LoadError, "#{e}\nCould not find #{HTS.lib_path}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.attach_function(*)
|
14
|
+
super
|
15
|
+
rescue FFI::NotFoundError => e
|
16
|
+
warn e.message
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module FFI
|
22
|
+
class Struct
|
23
|
+
class << self
|
24
|
+
# @example HtsOpt
|
25
|
+
# class HtsOpt < FFI::Struct
|
26
|
+
# layout \
|
27
|
+
# :arg, :string,
|
28
|
+
# :opt, HtsFmtOption,
|
29
|
+
# :val,
|
30
|
+
# union_layout(
|
31
|
+
# :i, :int,
|
32
|
+
# :s, :string
|
33
|
+
# ),
|
34
|
+
# :next, HtsOpt.ptr
|
35
|
+
# end
|
36
|
+
|
37
|
+
def union_layout(*args)
|
38
|
+
Class.new(FFI::Union) { layout(*args) }
|
39
|
+
end
|
40
|
+
|
41
|
+
# @example HtsFormat
|
42
|
+
# class HtsFormat < FFI::Struct
|
43
|
+
# layout \
|
44
|
+
# :category, HtsFormatCategory,
|
45
|
+
# :format, HtsExactFormat,
|
46
|
+
# :version,
|
47
|
+
# struct_layout(
|
48
|
+
# :major, :short,
|
49
|
+
# :minor, :short
|
50
|
+
# ),
|
51
|
+
# :compression, HtsCompression,
|
52
|
+
# :compression_level, :short,
|
53
|
+
# :specific, :pointer
|
54
|
+
# end
|
55
|
+
|
56
|
+
def struct_layout(*args)
|
57
|
+
Class.new(FFI::Struct) { layout(*args) }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Struct that support bit fields.
|
63
|
+
# Currently readonly.
|
64
|
+
class BitStruct < Struct
|
65
|
+
class << self
|
66
|
+
module BitFieldsModule
|
67
|
+
def [](name)
|
68
|
+
bit_fields = self.class.bit_fields_hash_table
|
69
|
+
parent, start, width = bit_fields[name]
|
70
|
+
if parent
|
71
|
+
(super(parent) >> start) & ((1 << width) - 1)
|
72
|
+
else
|
73
|
+
super(name)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
private_constant :BitFieldsModule
|
78
|
+
|
79
|
+
attr_reader :bit_fields_hash_table
|
80
|
+
|
81
|
+
# @example Bcf1
|
82
|
+
# class Bcf1 < FFI::BitStruct
|
83
|
+
# layout \
|
84
|
+
# :pos, :hts_pos_t,
|
85
|
+
# :rlen, :hts_pos_t,
|
86
|
+
# :rid, :int32_t,
|
87
|
+
# :qual, :float,
|
88
|
+
# :_n_info_allele, :uint32_t,
|
89
|
+
# :_n_fmt_sample, :uint32_t,
|
90
|
+
# :shared, KString,
|
91
|
+
# :indiv, KString,
|
92
|
+
# :d, BcfDec,
|
93
|
+
# :max_unpack, :int,
|
94
|
+
# :unpacked, :int,
|
95
|
+
# :unpack_size, [:int, 3],
|
96
|
+
# :errcode, :int
|
97
|
+
#
|
98
|
+
# bit_fields :_n_info_allele,
|
99
|
+
# :n_info, 16,
|
100
|
+
# :n_allele, 16
|
101
|
+
#
|
102
|
+
# bit_fields :_n_fmt_sample,
|
103
|
+
# :n_fmt, 8,
|
104
|
+
# :n_sample, 24
|
105
|
+
# end
|
106
|
+
|
107
|
+
def bit_fields(*args)
|
108
|
+
unless instance_variable_defined?(:@bit_fields_hash_table)
|
109
|
+
@bit_fields_hash_table = {}
|
110
|
+
prepend BitFieldsModule
|
111
|
+
end
|
112
|
+
|
113
|
+
parent = args.shift
|
114
|
+
labels = []
|
115
|
+
widths = []
|
116
|
+
args.each_slice(2) do |l, w|
|
117
|
+
labels << l
|
118
|
+
widths << w
|
119
|
+
end
|
120
|
+
starts = widths.inject([0]) do |result, w|
|
121
|
+
result << (result.last + w)
|
122
|
+
end
|
123
|
+
labels.zip(starts, widths).each do |l, s, w|
|
124
|
+
@bit_fields_hash_table[l] = [parent, s, w]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
require_relative "libhts/constants"
|
132
|
+
|
133
|
+
# alphabetical order
|
134
|
+
require_relative "libhts/bgzf"
|
135
|
+
require_relative "libhts/faidx"
|
136
|
+
require_relative "libhts/hfile"
|
137
|
+
require_relative "libhts/hts"
|
138
|
+
require_relative "libhts/sam"
|
139
|
+
require_relative "libhts/kfunc"
|
140
|
+
require_relative "libhts/tbx"
|
141
|
+
require_relative "libhts/vcf"
|
@@ -1,34 +1,34 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
4
|
+
module LibHTS
|
5
5
|
typedef :pointer, :HFILE
|
6
6
|
typedef :int64, :hts_pos_t
|
7
7
|
typedef :pointer, :bam_plp_auto_f
|
8
8
|
|
9
9
|
# kstring
|
10
10
|
|
11
|
-
class KString <
|
11
|
+
class KString < FFI::Struct
|
12
12
|
layout \
|
13
13
|
:l, :size_t,
|
14
14
|
:m, :size_t,
|
15
15
|
:s, :string
|
16
16
|
end
|
17
17
|
|
18
|
-
class KSeq <
|
18
|
+
class KSeq < FFI::Struct
|
19
19
|
layout \
|
20
20
|
:name, KString,
|
21
21
|
:comment, KString,
|
22
22
|
:seq, KString,
|
23
23
|
:qual, KString,
|
24
24
|
:last_char, :int,
|
25
|
-
:f, :pointer #
|
25
|
+
:f, :pointer # kstream_t
|
26
26
|
end
|
27
27
|
|
28
28
|
# BGZF
|
29
|
-
class BGZF <
|
29
|
+
class BGZF < FFI::BitStruct
|
30
30
|
layout \
|
31
|
-
:
|
31
|
+
:_flags, :uint, # bit_fields
|
32
32
|
:cache_size, :int,
|
33
33
|
:block_length, :int,
|
34
34
|
:block_clength, :int,
|
@@ -44,9 +44,29 @@ module HTS
|
|
44
44
|
:idx_build_otf, :int,
|
45
45
|
:gz_stream, :pointer,
|
46
46
|
:seeked, :int64
|
47
|
+
|
48
|
+
bit_fields :_flags,
|
49
|
+
:errcode, 16,
|
50
|
+
:_reserved, 1,
|
51
|
+
:is_write, 1,
|
52
|
+
:no_eof_block, 1,
|
53
|
+
:is_be, 1,
|
54
|
+
:compress_level, 9,
|
55
|
+
:last_block_eof, 1,
|
56
|
+
:is_compressed, 1,
|
57
|
+
:is_gzip, 1
|
47
58
|
end
|
48
59
|
|
49
60
|
# hts
|
61
|
+
HtsLogLevel = enum(
|
62
|
+
:off, # All logging disabled.
|
63
|
+
:error, # Logging of errors only.
|
64
|
+
:warning, 3, # Logging of errors and warnings.
|
65
|
+
:info, # Logging of errors, warnings, and normal but significant events.
|
66
|
+
:debug, # Logging of all except the most detailed debug events.
|
67
|
+
:trace # All logging enabled.
|
68
|
+
)
|
69
|
+
|
50
70
|
HtsFormatCategory = enum(
|
51
71
|
:unknown_category,
|
52
72
|
:sequence_data, # Sequence data -- SAM, BAM, CRAM, etc
|
@@ -105,7 +125,7 @@ module HTS
|
|
105
125
|
:HTS_OPT_BLOCK_SIZE
|
106
126
|
)
|
107
127
|
|
108
|
-
class HtsFormat <
|
128
|
+
class HtsFormat < FFI::Struct
|
109
129
|
layout \
|
110
130
|
:category, HtsFormatCategory,
|
111
131
|
:format, HtsExactFormat,
|
@@ -119,7 +139,7 @@ module HTS
|
|
119
139
|
:specific, :pointer
|
120
140
|
end
|
121
141
|
|
122
|
-
class HtsIdx <
|
142
|
+
class HtsIdx < FFI::Struct
|
123
143
|
layout \
|
124
144
|
:fmt, :int,
|
125
145
|
:min_shift, :int,
|
@@ -152,7 +172,7 @@ module HTS
|
|
152
172
|
end
|
153
173
|
|
154
174
|
# HtsFile
|
155
|
-
class SamHdr <
|
175
|
+
class SamHdr < FFI::Struct
|
156
176
|
layout \
|
157
177
|
:n_targets, :int32,
|
158
178
|
:ignore_sam_err, :int32,
|
@@ -168,9 +188,9 @@ module HTS
|
|
168
188
|
|
169
189
|
BamHdr = SamHdr
|
170
190
|
|
171
|
-
class HtsFile <
|
191
|
+
class HtsFile < FFI::BitStruct
|
172
192
|
layout \
|
173
|
-
:
|
193
|
+
:_flags, :uint32, # bit_fields
|
174
194
|
:lineno, :int64,
|
175
195
|
:line, KString,
|
176
196
|
:fn, :string,
|
@@ -186,17 +206,25 @@ module HTS
|
|
186
206
|
:idx, HtsIdx.ptr,
|
187
207
|
:fnidx, :string,
|
188
208
|
:bam_header, SamHdr.ptr
|
209
|
+
|
210
|
+
bit_fields :_flags,
|
211
|
+
:is_bin, 1,
|
212
|
+
:is_write, 1,
|
213
|
+
:is_be, 1,
|
214
|
+
:is_cram, 1,
|
215
|
+
:is_bgzf, 1,
|
216
|
+
:dummy, 27
|
189
217
|
end
|
190
218
|
|
191
219
|
SamFile = HtsFile
|
192
220
|
|
193
|
-
class HtsThreadPool <
|
221
|
+
class HtsThreadPool < FFI::Struct
|
194
222
|
layout \
|
195
223
|
:pool, :pointer,
|
196
224
|
:qsize, :int
|
197
225
|
end
|
198
226
|
|
199
|
-
class HtsOpt <
|
227
|
+
class HtsOpt < FFI::Struct
|
200
228
|
layout \
|
201
229
|
:arg, :string,
|
202
230
|
:opt, HtsFmtOption,
|
@@ -208,9 +236,9 @@ module HTS
|
|
208
236
|
:next, HtsOpt.ptr
|
209
237
|
end
|
210
238
|
|
211
|
-
class HtsItr <
|
239
|
+
class HtsItr < FFI::BitStruct
|
212
240
|
layout \
|
213
|
-
:
|
241
|
+
:_flags, :uint32, # bit_fields
|
214
242
|
:tid, :int,
|
215
243
|
:n_off, :int,
|
216
244
|
:i, :int,
|
@@ -235,9 +263,17 @@ module HTS
|
|
235
263
|
:m, :int,
|
236
264
|
:a, :pointer
|
237
265
|
)
|
266
|
+
|
267
|
+
bit_fields :_flags,
|
268
|
+
:read_rest, 1,
|
269
|
+
:finished, 1,
|
270
|
+
:is_cram, 1,
|
271
|
+
:nocoor, 1,
|
272
|
+
:multi, 1,
|
273
|
+
:dummy, 27
|
238
274
|
end
|
239
275
|
|
240
|
-
class Bam1Core <
|
276
|
+
class Bam1Core < FFI::Struct
|
241
277
|
layout \
|
242
278
|
:pos, :hts_pos_t,
|
243
279
|
:tid, :int32,
|
@@ -253,23 +289,50 @@ module HTS
|
|
253
289
|
:isize, :hts_pos_t
|
254
290
|
end
|
255
291
|
|
256
|
-
class Bam1 <
|
292
|
+
class Bam1 < FFI::Struct
|
257
293
|
layout \
|
258
294
|
:core, Bam1Core,
|
259
295
|
:id, :uint64,
|
260
296
|
:data, :pointer, # uint8_t
|
261
297
|
:l_data, :int,
|
262
298
|
:m_data, :uint32,
|
263
|
-
:
|
299
|
+
:_mempolicy, :uint32 # bit_fields
|
300
|
+
|
301
|
+
# bit_fields :_mempolicy,
|
302
|
+
# :mempolicy, 2,
|
303
|
+
# :_reserved, 30
|
264
304
|
end
|
265
305
|
|
266
|
-
|
306
|
+
typedef :pointer, :bam_plp
|
307
|
+
typedef :pointer, :bam_mplp
|
308
|
+
|
309
|
+
class BamPileupCd < FFI::Union
|
310
|
+
layout \
|
311
|
+
:p, :pointer,
|
312
|
+
:i, :int64_t,
|
313
|
+
:f, :double
|
267
314
|
end
|
268
315
|
|
269
|
-
class
|
316
|
+
class BamPileup1 < FFI::BitStruct
|
317
|
+
layout \
|
318
|
+
:b, Bam1.ptr,
|
319
|
+
:qpos, :int32_t,
|
320
|
+
:indel, :int,
|
321
|
+
:level, :int,
|
322
|
+
:_flags, :uint32_t, # bit_fields
|
323
|
+
:cd, BamPileupCd,
|
324
|
+
:cigar_ind, :int
|
325
|
+
|
326
|
+
bit_fields :_flags,
|
327
|
+
:is_del, 1,
|
328
|
+
:is_head, 1,
|
329
|
+
:is_tail, 1,
|
330
|
+
:is_refskip, 1,
|
331
|
+
:_reserved, 1,
|
332
|
+
:aux, 27
|
270
333
|
end
|
271
334
|
|
272
|
-
class TbxConf <
|
335
|
+
class TbxConf < FFI::Struct
|
273
336
|
layout \
|
274
337
|
:preset, :int32,
|
275
338
|
:sc, :int32,
|
@@ -279,7 +342,7 @@ module HTS
|
|
279
342
|
:line_skip, :int32
|
280
343
|
end
|
281
344
|
|
282
|
-
class Tbx <
|
345
|
+
class Tbx < FFI::Struct
|
283
346
|
layout \
|
284
347
|
:conf, TbxConf.ptr,
|
285
348
|
:idx, HtsIdx.ptr,
|
@@ -290,7 +353,7 @@ module HTS
|
|
290
353
|
|
291
354
|
FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
|
292
355
|
|
293
|
-
class Faidx <
|
356
|
+
class Faidx < FFI::Struct
|
294
357
|
layout :bgzf, BGZF,
|
295
358
|
:n, :int,
|
296
359
|
:m, :int,
|
@@ -301,13 +364,13 @@ module HTS
|
|
301
364
|
|
302
365
|
# vcf
|
303
366
|
|
304
|
-
class
|
367
|
+
class BcfVariant < FFI::Struct
|
305
368
|
layout \
|
306
369
|
:type, :int,
|
307
370
|
:n, :int
|
308
371
|
end
|
309
372
|
|
310
|
-
class BcfHrec <
|
373
|
+
class BcfHrec < FFI::Struct
|
311
374
|
layout \
|
312
375
|
:type, :int,
|
313
376
|
:key, :string,
|
@@ -317,7 +380,7 @@ module HTS
|
|
317
380
|
:vals, :pointer
|
318
381
|
end
|
319
382
|
|
320
|
-
class BcfFmt <
|
383
|
+
class BcfFmt < FFI::BitStruct
|
321
384
|
layout \
|
322
385
|
:id, :int,
|
323
386
|
:n, :int,
|
@@ -325,10 +388,14 @@ module HTS
|
|
325
388
|
:type, :int,
|
326
389
|
:p, :pointer, # uint8_t
|
327
390
|
:p_len, :uint32,
|
328
|
-
:
|
391
|
+
:_p_off_free, :uint32 # bit_fields
|
392
|
+
|
393
|
+
bit_fields :_p_off_free,
|
394
|
+
:p_off, 31,
|
395
|
+
:p_free, 1
|
329
396
|
end
|
330
397
|
|
331
|
-
class BcfInfo <
|
398
|
+
class BcfInfo < FFI::BitStruct
|
332
399
|
layout \
|
333
400
|
:key, :int,
|
334
401
|
:type, :int,
|
@@ -339,24 +406,28 @@ module HTS
|
|
339
406
|
),
|
340
407
|
:vptr, :pointer,
|
341
408
|
:vptr_len, :uint32,
|
342
|
-
:
|
409
|
+
:_vptr_off_free, :uint32, # bit_fields
|
343
410
|
:len, :int
|
411
|
+
|
412
|
+
bit_fields :_vptr_off_free,
|
413
|
+
:vptr_off, 31,
|
414
|
+
:vptr_free, 1
|
344
415
|
end
|
345
416
|
|
346
|
-
class BcfIdinfo <
|
417
|
+
class BcfIdinfo < FFI::Struct
|
347
418
|
layout \
|
348
419
|
:info, [:uint8, 3],
|
349
420
|
:hrec, [BcfHrec.ptr, 3],
|
350
421
|
:id, :int
|
351
422
|
end
|
352
423
|
|
353
|
-
class BcfIdpair <
|
424
|
+
class BcfIdpair < FFI::Struct
|
354
425
|
layout \
|
355
426
|
:key, :string,
|
356
427
|
:val, BcfIdinfo.ptr
|
357
428
|
end
|
358
429
|
|
359
|
-
class BcfHdr <
|
430
|
+
class BcfHdr < FFI::Struct
|
360
431
|
layout \
|
361
432
|
:n, [:int, 3],
|
362
433
|
:id, [BcfIdpair.ptr, 3],
|
@@ -373,7 +444,7 @@ module HTS
|
|
373
444
|
:m, [:int, 3]
|
374
445
|
end
|
375
446
|
|
376
|
-
class BcfDec <
|
447
|
+
class BcfDec < FFI::Struct
|
377
448
|
layout \
|
378
449
|
:m_fmt, :int,
|
379
450
|
:m_info, :int,
|
@@ -388,21 +459,21 @@ module HTS
|
|
388
459
|
:allele, :pointer,
|
389
460
|
:info, BcfInfo.ptr,
|
390
461
|
:fmt, BcfFmt.ptr,
|
391
|
-
:var,
|
462
|
+
:var, BcfVariant.ptr,
|
392
463
|
:n_var, :int,
|
393
464
|
:var_type, :int,
|
394
465
|
:shared_dirty, :int,
|
395
466
|
:indiv_dirty, :int
|
396
467
|
end
|
397
468
|
|
398
|
-
class Bcf1 <
|
469
|
+
class Bcf1 < FFI::BitStruct
|
399
470
|
layout \
|
400
471
|
:pos, :hts_pos_t,
|
401
472
|
:rlen, :hts_pos_t,
|
402
473
|
:rid, :int32_t,
|
403
474
|
:qual, :float,
|
404
|
-
:
|
405
|
-
:
|
475
|
+
:_n_info_allele, :uint32_t,
|
476
|
+
:_n_fmt_sample, :uint32_t,
|
406
477
|
:shared, KString,
|
407
478
|
:indiv, KString,
|
408
479
|
:d, BcfDec,
|
@@ -411,13 +482,13 @@ module HTS
|
|
411
482
|
:unpack_size, [:int, 3],
|
412
483
|
:errcode, :int
|
413
484
|
|
414
|
-
|
415
|
-
|
416
|
-
|
485
|
+
bit_fields :_n_info_allele,
|
486
|
+
:n_info, 16,
|
487
|
+
:n_allele, 16
|
417
488
|
|
418
|
-
|
419
|
-
|
420
|
-
|
489
|
+
bit_fields :_n_fmt_sample,
|
490
|
+
:n_fmt, 8,
|
491
|
+
:n_sample, 24
|
421
492
|
end
|
422
493
|
end
|
423
494
|
end
|