htslib 0.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/lib/hts/bam.rb +85 -0
- data/lib/hts/fai.rb +14 -0
- data/lib/hts/ffi.rb +31 -0
- data/lib/hts/ffi/bgzf.rb +192 -0
- data/lib/hts/ffi/faidx.rb +139 -0
- data/lib/hts/ffi/hfile.rb +81 -0
- data/lib/hts/ffi/hts.rb +324 -0
- data/lib/hts/ffi/kfunc.rb +36 -0
- data/lib/hts/ffi/sam.rb +648 -0
- data/lib/hts/ffi/struct.rb +12 -0
- data/lib/hts/ffi/tbx.rb +68 -0
- data/lib/hts/ffi_constants.rb +430 -0
- data/lib/hts/tbx.rb +12 -0
- data/lib/hts/vcf.rb +28 -0
- data/lib/hts/version.rb +5 -0
- data/lib/htslib.rb +18 -0
- metadata +145 -0
@@ -0,0 +1,12 @@
|
|
1
|
+
# This should be removed if you get the better way...
|
2
|
+
module FFI
|
3
|
+
class Struct
|
4
|
+
def self.union_layout(*args)
|
5
|
+
Class.new(FFI::Union) { layout(*args) }
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.struct_layout(*args)
|
9
|
+
Class.new(FFI::Struct) { layout(*args) }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/lib/hts/ffi/tbx.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module HTS
|
2
|
+
module FFI
|
3
|
+
attach_function \
|
4
|
+
:tbx_name2id,
|
5
|
+
[Tbx, :string],
|
6
|
+
:int
|
7
|
+
|
8
|
+
# Internal helper function used by tbx_itr_next()
|
9
|
+
attach_function \
|
10
|
+
:hts_get_bgzfp,
|
11
|
+
[HtsFile],
|
12
|
+
BGZF.by_ref
|
13
|
+
|
14
|
+
attach_function \
|
15
|
+
:tbx_readrec,
|
16
|
+
[BGZF, :pointer, :pointer, :pointer, :pointer, :pointer],
|
17
|
+
:int
|
18
|
+
|
19
|
+
# Build an index of the lines in a BGZF-compressed file
|
20
|
+
attach_function \
|
21
|
+
:tbx_index,
|
22
|
+
[BGZF, :int, TbxConf],
|
23
|
+
Tbx.by_ref
|
24
|
+
|
25
|
+
attach_function \
|
26
|
+
:tbx_index_build,
|
27
|
+
[:string, :int, TbxConf],
|
28
|
+
:int
|
29
|
+
|
30
|
+
attach_function \
|
31
|
+
:tbx_index_build2,
|
32
|
+
[:string, :string, :int, TbxConf],
|
33
|
+
:int
|
34
|
+
|
35
|
+
attach_function \
|
36
|
+
:tbx_index_build3,
|
37
|
+
[:string, :string, :int, :int, TbxConf],
|
38
|
+
:int
|
39
|
+
|
40
|
+
# Load or stream a .tbi or .csi index
|
41
|
+
attach_function \
|
42
|
+
:tbx_index_load,
|
43
|
+
[:string],
|
44
|
+
Tbx.by_ref
|
45
|
+
|
46
|
+
# Load or stream a .tbi or .csi index
|
47
|
+
attach_function \
|
48
|
+
:tbx_index_load2,
|
49
|
+
%i[string string],
|
50
|
+
Tbx.by_ref
|
51
|
+
|
52
|
+
# Load or stream a .tbi or .csi index
|
53
|
+
attach_function \
|
54
|
+
:tbx_index_load3,
|
55
|
+
%i[string string int],
|
56
|
+
Tbx.by_ref
|
57
|
+
|
58
|
+
attach_function \
|
59
|
+
:tbx_seqnames,
|
60
|
+
[Tbx, :int],
|
61
|
+
:pointer
|
62
|
+
|
63
|
+
attach_function \
|
64
|
+
:tbx_destroy,
|
65
|
+
[Tbx],
|
66
|
+
:void
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,430 @@
|
|
1
|
+
module HTS
|
2
|
+
module FFI
|
3
|
+
typedef :pointer, :HFILE
|
4
|
+
typedef :int64, :hts_pos_t
|
5
|
+
typedef :pointer, :bam_plp_auto_f
|
6
|
+
|
7
|
+
# kstring
|
8
|
+
|
9
|
+
class Kstring < ::FFI::Struct
|
10
|
+
layout \
|
11
|
+
:l, :size_t,
|
12
|
+
:m, :size_t,
|
13
|
+
:s, :string
|
14
|
+
end
|
15
|
+
|
16
|
+
# BGZF
|
17
|
+
class BGZF < ::FFI::Struct
|
18
|
+
layout \
|
19
|
+
:piyo1, :uint, # FIXME
|
20
|
+
:cache_size, :int,
|
21
|
+
:block_length, :int,
|
22
|
+
:block_clength, :int,
|
23
|
+
:block_offset, :int,
|
24
|
+
:block_address, :int64,
|
25
|
+
:uncompressed_address, :int64,
|
26
|
+
:uncompressed_block, :pointer,
|
27
|
+
:compressed_block, :pointer,
|
28
|
+
:cache, :pointer,
|
29
|
+
:fp, :HFILE,
|
30
|
+
:mt, :pointer,
|
31
|
+
:idx, :pointer,
|
32
|
+
:idx_build_otf, :int,
|
33
|
+
:gz_stream, :pointer,
|
34
|
+
:seeked, :int64
|
35
|
+
end
|
36
|
+
|
37
|
+
# hts
|
38
|
+
HtsFormatCategory = enum(
|
39
|
+
:unknown_category,
|
40
|
+
:sequence_data, # Sequence data -- SAM, BAM, CRAM, etc
|
41
|
+
:variant_data, # Variant calling data -- VCF, BCF, etc
|
42
|
+
:index_file, # Index file associated with some data file
|
43
|
+
:region_list, # Coordinate intervals or regions -- BED, etc
|
44
|
+
:category_maximum, 32_767
|
45
|
+
)
|
46
|
+
|
47
|
+
HtsExactFormat = enum(
|
48
|
+
:unknown_format,
|
49
|
+
:binary_format, :text_format,
|
50
|
+
:sam, :bam, :bai, :cram, :crai, :vcf, :bcf, :csi, :gzi, :tbi, :bed,
|
51
|
+
:htsget, :json,
|
52
|
+
:empty_format,
|
53
|
+
:fasta_format, :fastq_format, :fai_format, :fqi_format,
|
54
|
+
:hts_crypt4gh_format,
|
55
|
+
:format_maximum, 32_767
|
56
|
+
)
|
57
|
+
|
58
|
+
HtsCompression = enum(
|
59
|
+
:no_compression, :gzip, :bgzf, :custom,
|
60
|
+
:compression_maximum, 32_767
|
61
|
+
)
|
62
|
+
|
63
|
+
HtsFmtOption = enum(
|
64
|
+
:CRAM_OPT_DECODE_MD,
|
65
|
+
:CRAM_OPT_PREFIX,
|
66
|
+
:CRAM_OPT_VERBOSITY, # obsolete, use hts_set_log_level() instead
|
67
|
+
:CRAM_OPT_SEQS_PER_SLICE,
|
68
|
+
:CRAM_OPT_SLICES_PER_CONTAINER,
|
69
|
+
:CRAM_OPT_RANGE,
|
70
|
+
:CRAM_OPT_VERSION, # rename to :CRAM_version?
|
71
|
+
:CRAM_OPT_EMBED_REF,
|
72
|
+
:CRAM_OPT_IGNORE_MD5,
|
73
|
+
:CRAM_OPT_REFERENCE, # make general
|
74
|
+
:CRAM_OPT_MULTI_SEQ_PER_SLICE,
|
75
|
+
:CRAM_OPT_NO_REF,
|
76
|
+
:CRAM_OPT_USE_BZIP2,
|
77
|
+
:CRAM_OPT_SHARED_REF,
|
78
|
+
:CRAM_OPT_NTHREADS, # deprecated, use HTS_OPT_NTHREADS
|
79
|
+
:CRAM_OPT_THREAD_POOL, # make general
|
80
|
+
:CRAM_OPT_USE_LZMA,
|
81
|
+
:CRAM_OPT_USE_RANS,
|
82
|
+
:CRAM_OPT_REQUIRED_FIELDS,
|
83
|
+
:CRAM_OPT_LOSSY_NAMES,
|
84
|
+
:CRAM_OPT_BASES_PER_SLICE,
|
85
|
+
:CRAM_OPT_STORE_MD,
|
86
|
+
:CRAM_OPT_STORE_NM,
|
87
|
+
:CRAM_OPT_RANGE_NOSEEK, # CRAM_OPT_RANGE minus the seek
|
88
|
+
# General purpose
|
89
|
+
:HTS_OPT_COMPRESSION_LEVEL, 100,
|
90
|
+
:HTS_OPT_NTHREADS,
|
91
|
+
:HTS_OPT_THREAD_POOL,
|
92
|
+
:HTS_OPT_CACHE_SIZE,
|
93
|
+
:HTS_OPT_BLOCK_SIZE
|
94
|
+
)
|
95
|
+
|
96
|
+
class HtsFormat < ::FFI::Struct
|
97
|
+
layout \
|
98
|
+
:category, HtsFormatCategory,
|
99
|
+
:format, HtsExactFormat,
|
100
|
+
:version,
|
101
|
+
struct_layout(
|
102
|
+
:major, :short,
|
103
|
+
:minor, :short
|
104
|
+
),
|
105
|
+
:compression, HtsCompression,
|
106
|
+
:compression_level, :short,
|
107
|
+
:specific, :pointer
|
108
|
+
end
|
109
|
+
|
110
|
+
class HtsIdx < ::FFI::Struct
|
111
|
+
layout \
|
112
|
+
:fmt, :int,
|
113
|
+
:min_shift, :int,
|
114
|
+
:n_lvls, :int,
|
115
|
+
:n_bins, :int,
|
116
|
+
:l_meta, :uint32,
|
117
|
+
:n, :int32,
|
118
|
+
:m, :int32,
|
119
|
+
:n_no_coor, :uint64,
|
120
|
+
:bidx, :pointer,
|
121
|
+
:lidx, :pointer,
|
122
|
+
:meta, :pointer,
|
123
|
+
:tbi_n, :int,
|
124
|
+
:last_tbi_tid, :int,
|
125
|
+
:z,
|
126
|
+
union_layout(
|
127
|
+
:last_bin, :uint32,
|
128
|
+
:save_bin, :uint32,
|
129
|
+
:last_coor, :pointer,
|
130
|
+
:last_tid, :int,
|
131
|
+
:save_tid, :int,
|
132
|
+
:finished, :int,
|
133
|
+
:last_off, :uint64,
|
134
|
+
:save_off, :uint64,
|
135
|
+
:off_beg, :uint64,
|
136
|
+
:off_end, :uint64,
|
137
|
+
:n_mapped, :uint64,
|
138
|
+
:n_unmapped, :uint64
|
139
|
+
)
|
140
|
+
end
|
141
|
+
|
142
|
+
class SamHdr < ::FFI::Struct # HtsFile
|
143
|
+
layout \
|
144
|
+
:n_targets, :int32,
|
145
|
+
:ignore_sam_err, :int32,
|
146
|
+
:l_text, :size_t,
|
147
|
+
:target_len, :pointer,
|
148
|
+
:cigar_tab, :pointer,
|
149
|
+
:target_name, :pointer,
|
150
|
+
:text, :string,
|
151
|
+
:sdict, :pointer,
|
152
|
+
:hrecs, :pointer,
|
153
|
+
:ref_count, :uint32
|
154
|
+
end
|
155
|
+
|
156
|
+
BamHdr = SamHdr
|
157
|
+
|
158
|
+
class HtsFile < ::FFI::Struct
|
159
|
+
layout \
|
160
|
+
:bitfields, :uint32, # FIXME
|
161
|
+
:lineno, :int64,
|
162
|
+
:line, Kstring,
|
163
|
+
:fn, :string,
|
164
|
+
:fn_aux, :string,
|
165
|
+
:fp,
|
166
|
+
union_layout(
|
167
|
+
:bgzf, BGZF.ptr,
|
168
|
+
:cram, :pointer,
|
169
|
+
:hfile, :pointer # HFILE
|
170
|
+
),
|
171
|
+
:state, :pointer,
|
172
|
+
:format, HtsFormat,
|
173
|
+
:idx, HtsIdx.ptr,
|
174
|
+
:fnidx, :string,
|
175
|
+
:bam_header, SamHdr.ptr
|
176
|
+
end
|
177
|
+
|
178
|
+
SamFile = HtsFile
|
179
|
+
|
180
|
+
class HtsThreadPool < ::FFI::Struct
|
181
|
+
layout \
|
182
|
+
:pool, :pointer,
|
183
|
+
:qsize, :int
|
184
|
+
end
|
185
|
+
|
186
|
+
class HtsOpt < ::FFI::Struct
|
187
|
+
layout \
|
188
|
+
:arg, :string,
|
189
|
+
:opt, HtsFmtOption,
|
190
|
+
:val,
|
191
|
+
union_layout(
|
192
|
+
:i, :int,
|
193
|
+
:s, :string
|
194
|
+
),
|
195
|
+
:next, HtsOpt.ptr
|
196
|
+
end
|
197
|
+
|
198
|
+
class HtsItr < ::FFI::Struct
|
199
|
+
layout \
|
200
|
+
:foo, :uint32, # FIXME
|
201
|
+
:tid, :int,
|
202
|
+
:n_off, :int,
|
203
|
+
:i, :int,
|
204
|
+
:n_reg, :int,
|
205
|
+
:beg, :int64,
|
206
|
+
:end, :int64,
|
207
|
+
:reg_list, :pointer,
|
208
|
+
:curr_tid, :int,
|
209
|
+
:curr_reg, :int,
|
210
|
+
:curr_intv, :int,
|
211
|
+
:curr_beg, :int64,
|
212
|
+
:curr_end, :int64,
|
213
|
+
:curr_off, :uint64,
|
214
|
+
:nocoor_off, :uint64,
|
215
|
+
:off, :pointer,
|
216
|
+
:readrec, :pointer,
|
217
|
+
:seek, :pointer,
|
218
|
+
:tell, :pointer,
|
219
|
+
:bins,
|
220
|
+
union_layout(
|
221
|
+
:n, :int,
|
222
|
+
:m, :int,
|
223
|
+
:a, :pointer
|
224
|
+
)
|
225
|
+
end
|
226
|
+
class Bam1Core < ::FFI::Struct
|
227
|
+
layout \
|
228
|
+
:pos, :hts_pos_t,
|
229
|
+
:tid, :int32,
|
230
|
+
:bin, :uint16,
|
231
|
+
:qual, :uint8,
|
232
|
+
:l_extranul, :uint8,
|
233
|
+
:flag, :uint16,
|
234
|
+
:l_qname, :uint16,
|
235
|
+
:n_cigar, :uint32,
|
236
|
+
:l_qseq, :int32,
|
237
|
+
:mtid, :int32,
|
238
|
+
:mpos, :hts_pos_t,
|
239
|
+
:isize, :hts_pos_t
|
240
|
+
end
|
241
|
+
|
242
|
+
class Bam1 < ::FFI::Struct
|
243
|
+
layout \
|
244
|
+
:core, Bam1Core,
|
245
|
+
:id, :uint64,
|
246
|
+
:data, :pointer, # uint8_t
|
247
|
+
:l_data, :int,
|
248
|
+
:m_data, :uint32,
|
249
|
+
:mempolicy, :uint32
|
250
|
+
end
|
251
|
+
|
252
|
+
class BamPlp < ::FFI::Struct
|
253
|
+
end
|
254
|
+
|
255
|
+
class BamMplp < ::FFI::Struct
|
256
|
+
end
|
257
|
+
|
258
|
+
BAM_CMATCH = 0
|
259
|
+
BAM_CINS = 1
|
260
|
+
BAM_CDEL = 2
|
261
|
+
BAM_CREF_SKIP = 3
|
262
|
+
BAM_CSOFT_CLIP = 4
|
263
|
+
BAM_CHARD_CLIP = 5
|
264
|
+
BAM_CPAD = 6
|
265
|
+
BAM_CEQUAL = 7
|
266
|
+
BAM_CDIFF = 8
|
267
|
+
BAM_CBACK = 9
|
268
|
+
|
269
|
+
BAM_CIGAR_STR = 'MIDNSHP=XB'.freeze
|
270
|
+
BAM_CIGAR_STR_PADDED = 'MIDNSHP=XB??????'.freeze
|
271
|
+
BAM_CIGAR_SHIFT = 4
|
272
|
+
BAM_CIGAR_MASK = 0xf
|
273
|
+
BAM_CIGAR_TYPE = 0x3C1A7
|
274
|
+
|
275
|
+
BAM_FPAIRED = 1
|
276
|
+
BAM_FPROPER_PAIR = 2
|
277
|
+
BAM_FUNMAP = 4
|
278
|
+
BAM_FMUNMAP = 8
|
279
|
+
BAM_FREVERSE = 16
|
280
|
+
BAM_FMREVERSE = 32
|
281
|
+
BAM_FREAD1 = 64
|
282
|
+
BAM_FREAD2 = 128
|
283
|
+
BAM_FSECONDARY = 256
|
284
|
+
BAM_FQCFAIL = 512
|
285
|
+
BAM_FDUP = 1024
|
286
|
+
BAM_FSUPPLEMENTARY = 2048
|
287
|
+
|
288
|
+
class TbxConf < ::FFI::Struct
|
289
|
+
layout \
|
290
|
+
:preset, :int32,
|
291
|
+
:sc, :int32,
|
292
|
+
:bc, :int32,
|
293
|
+
:ec, :int32,
|
294
|
+
:meta_char, :int32,
|
295
|
+
:line_skip, :int32
|
296
|
+
end
|
297
|
+
|
298
|
+
class Tbx < ::FFI::Struct
|
299
|
+
layout \
|
300
|
+
:conf, TbxConf.ptr,
|
301
|
+
:idx, HtsIdx.ptr,
|
302
|
+
:dict, :pointer
|
303
|
+
end
|
304
|
+
|
305
|
+
# faidx
|
306
|
+
|
307
|
+
FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
|
308
|
+
|
309
|
+
class Faidx < ::FFI::Struct
|
310
|
+
layout :bgzf, BGZF,
|
311
|
+
:n, :int,
|
312
|
+
:m, :int,
|
313
|
+
:name, :pointer,
|
314
|
+
:hash, :pointer,
|
315
|
+
:format, FaiFormatOptions
|
316
|
+
end
|
317
|
+
|
318
|
+
# vcf
|
319
|
+
|
320
|
+
class Variant < ::FFI::Struct
|
321
|
+
layout \
|
322
|
+
:type, :int,
|
323
|
+
:n, :int
|
324
|
+
end
|
325
|
+
|
326
|
+
class BcfHrec < ::FFI::Struct
|
327
|
+
layout \
|
328
|
+
:type, :int,
|
329
|
+
:key, :string,
|
330
|
+
:value, :string,
|
331
|
+
:nkeys, :int,
|
332
|
+
:keys, :pointer,
|
333
|
+
:vals, :pointer
|
334
|
+
end
|
335
|
+
|
336
|
+
class BcfFmt < ::FFI::Struct
|
337
|
+
layout \
|
338
|
+
:id, :int,
|
339
|
+
:n, :int,
|
340
|
+
:size, :int,
|
341
|
+
:type, :int,
|
342
|
+
:p, :pointer, # uint8_t
|
343
|
+
:p_len, :uint32,
|
344
|
+
:piyo, :uint32 # FIXME
|
345
|
+
end
|
346
|
+
|
347
|
+
class BcfInfo < ::FFI::Struct
|
348
|
+
layout \
|
349
|
+
:key, :int,
|
350
|
+
:type, :int,
|
351
|
+
:v1,
|
352
|
+
union_layout(
|
353
|
+
:i, :int64,
|
354
|
+
:f, :float
|
355
|
+
),
|
356
|
+
:vptr, :pointer,
|
357
|
+
:vptr_len, :uint32,
|
358
|
+
:piyo, :uint32, # FIXME
|
359
|
+
:len, :int
|
360
|
+
end
|
361
|
+
|
362
|
+
class BcfIdinfo < ::FFI::Struct
|
363
|
+
layout \
|
364
|
+
:info, [:uint8, 3],
|
365
|
+
:hrec, [BcfHrec.ptr, 3],
|
366
|
+
:id, :int
|
367
|
+
end
|
368
|
+
|
369
|
+
class BcfIdpair < ::FFI::Struct
|
370
|
+
layout \
|
371
|
+
:key, :string,
|
372
|
+
:val, BcfIdinfo.ptr
|
373
|
+
end
|
374
|
+
|
375
|
+
class BcfHdr < ::FFI::Struct
|
376
|
+
layout \
|
377
|
+
:n, [:int, 3],
|
378
|
+
:id, [BcfIdpair.ptr, 3],
|
379
|
+
:dict, [:pointer, 3],
|
380
|
+
:samples, :pointer,
|
381
|
+
:hrec, :pointer,
|
382
|
+
:nhrec, :int,
|
383
|
+
:dirty, :int,
|
384
|
+
:ntransl, :int,
|
385
|
+
:transl, :pointer,
|
386
|
+
:nsamples_ori, :int,
|
387
|
+
:keep_samples, :pointer,
|
388
|
+
:mem, Kstring,
|
389
|
+
:m, [:int, 3]
|
390
|
+
end
|
391
|
+
|
392
|
+
class BcfDec < ::FFI::Struct
|
393
|
+
layout \
|
394
|
+
:m_fmt, :int,
|
395
|
+
:m_info, :int,
|
396
|
+
:m_id, :int,
|
397
|
+
:m_als, :int,
|
398
|
+
:m_allele, :int,
|
399
|
+
:m_flt, :int,
|
400
|
+
:flt, :pointer,
|
401
|
+
:id, :string,
|
402
|
+
:als, :string,
|
403
|
+
:allele, :pointer,
|
404
|
+
:info, BcfInfo.ptr,
|
405
|
+
:fmt, BcfFmt.ptr,
|
406
|
+
:var, Variant.ptr,
|
407
|
+
:n_var, :int,
|
408
|
+
:var_type, :int,
|
409
|
+
:shared_dirty, :int,
|
410
|
+
:indiv_dirty, :int
|
411
|
+
end
|
412
|
+
|
413
|
+
class Bcf1 < ::FFI::Struct
|
414
|
+
layout \
|
415
|
+
:pos, :hts_pos_t,
|
416
|
+
:rlen, :hts_pos_t,
|
417
|
+
:rid, :int,
|
418
|
+
:qual, :float,
|
419
|
+
:piyo, :int, # FIXME
|
420
|
+
:fuga, :int, # FIXME
|
421
|
+
:shared, Kstring,
|
422
|
+
:indiv, Kstring,
|
423
|
+
:d, BcfDec,
|
424
|
+
:max_unpack, :int,
|
425
|
+
:unpacked, :int,
|
426
|
+
:unpack_size, [:int, 3],
|
427
|
+
:errcode, :int
|
428
|
+
end
|
429
|
+
end
|
430
|
+
end
|