htslib 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +67 -0
- data/lib/hts/bam/auxi.rb +329 -2
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +269 -28
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +110 -73
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +41 -3
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +29 -2
- data/lib/hts/version.rb +1 -1
- metadata +3 -3
- data/lib/hts/faidx/sequence.rb +0 -62
data/lib/hts/bcf.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require_relative "../htslib"
|
|
4
4
|
|
|
5
5
|
require_relative "hts"
|
|
6
|
+
require_relative "bcf/errors"
|
|
6
7
|
require_relative "bcf/header"
|
|
7
8
|
require_relative "bcf/info"
|
|
8
9
|
require_relative "bcf/format"
|
|
@@ -27,8 +28,27 @@ module HTS
|
|
|
27
28
|
file
|
|
28
29
|
end
|
|
29
30
|
|
|
31
|
+
def self.build_index(file_name, index_name = nil, min_shift = 14, threads = 0, verbose = true)
|
|
32
|
+
if verbose
|
|
33
|
+
if index_name
|
|
34
|
+
warn "Create index for #{file_name} to #{index_name}"
|
|
35
|
+
else
|
|
36
|
+
warn "Create index for #{file_name}"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
case LibHTS.bcf_index_build3(file_name, index_name, min_shift, threads)
|
|
41
|
+
when 0 # successful
|
|
42
|
+
when -1 then raise IndexError, "Indexing failed for #{file_name}"
|
|
43
|
+
when -2 then raise IndexError, "Opening #{file_name} failed while building the index"
|
|
44
|
+
when -3 then raise IndexError, "#{file_name} is not in an indexable format"
|
|
45
|
+
when -4 then raise IndexError, "Failed to create or save the index for #{file_name}"
|
|
46
|
+
else raise IndexError, "Unknown index build error for #{file_name}"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
30
50
|
def initialize(file_name, mode = "r", index: nil, threads: nil,
|
|
31
|
-
build_index: false)
|
|
51
|
+
build_index: false, subset: nil)
|
|
32
52
|
if block_given?
|
|
33
53
|
message = "HTS::Bcf.new() does not take block; Please use HTS::Bcf.open() instead"
|
|
34
54
|
raise message
|
|
@@ -42,43 +62,43 @@ module HTS
|
|
|
42
62
|
@nthreads = threads
|
|
43
63
|
@hts_file = LibHTS.hts_open(@file_name, mode)
|
|
44
64
|
|
|
45
|
-
raise
|
|
65
|
+
raise OpenError, "Failed to open #{@file_name}" if @hts_file.null?
|
|
46
66
|
|
|
47
67
|
set_threads(threads) if threads
|
|
48
68
|
|
|
69
|
+
raise SubsetError, "Sample subsetting is only available when reading BCF/VCF files" if subset && @mode[0] == "w"
|
|
70
|
+
|
|
49
71
|
return if @mode[0] == "w"
|
|
50
72
|
|
|
51
|
-
@
|
|
73
|
+
@read_header = Bcf::Header.new(@hts_file)
|
|
74
|
+
@header = subset ? @read_header.subset(subset) : @read_header
|
|
52
75
|
build_index(index) if build_index
|
|
53
76
|
@idx = load_index(index)
|
|
54
77
|
@start_position = tell
|
|
55
78
|
end
|
|
56
79
|
|
|
57
|
-
def build_index(index_name = nil, min_shift: 14,
|
|
80
|
+
def build_index(index_name = nil, min_shift: 14, verbose: true)
|
|
58
81
|
check_closed
|
|
59
82
|
|
|
60
|
-
|
|
61
|
-
warn "Create index for #{@file_name} to #{index_name}"
|
|
62
|
-
else
|
|
63
|
-
warn "Create index for #{@file_name}"
|
|
64
|
-
end
|
|
65
|
-
case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads || threads)
|
|
66
|
-
when 0 # successful
|
|
67
|
-
when -1 then raise "indexing failed"
|
|
68
|
-
when -2 then raise "opening #{@file_name} failed"
|
|
69
|
-
when -3 then raise "format not indexable"
|
|
70
|
-
when -4 then raise "failed to create and/or save the index"
|
|
71
|
-
else raise "unknown error"
|
|
72
|
-
end
|
|
83
|
+
self.class.build_index(@file_name, index_name, min_shift, @nthreads || 0, verbose)
|
|
73
84
|
self # for method chaining
|
|
74
85
|
end
|
|
75
86
|
|
|
76
87
|
def load_index(index_name = nil)
|
|
77
88
|
check_closed
|
|
78
89
|
|
|
79
|
-
if
|
|
90
|
+
if file_format == "vcf"
|
|
91
|
+
@index_format = :tabix
|
|
92
|
+
if index_name
|
|
93
|
+
LibHTS.tbx_index_load2(@file_name, index_name)
|
|
94
|
+
else
|
|
95
|
+
LibHTS.tbx_index_load3(@file_name, nil, 2)
|
|
96
|
+
end
|
|
97
|
+
elsif index_name
|
|
98
|
+
@index_format = :bcf
|
|
80
99
|
LibHTS.bcf_index_load2(@file_name, index_name)
|
|
81
100
|
else
|
|
101
|
+
@index_format = :bcf
|
|
82
102
|
LibHTS.bcf_index_load3(@file_name, nil, 2)
|
|
83
103
|
end
|
|
84
104
|
end
|
|
@@ -90,7 +110,14 @@ module HTS
|
|
|
90
110
|
end
|
|
91
111
|
|
|
92
112
|
def close
|
|
93
|
-
|
|
113
|
+
if @idx && !@idx.null?
|
|
114
|
+
case @index_format
|
|
115
|
+
when :bcf
|
|
116
|
+
LibHTS.hts_idx_destroy(@idx)
|
|
117
|
+
when :tabix
|
|
118
|
+
@idx.close
|
|
119
|
+
end
|
|
120
|
+
end
|
|
94
121
|
@idx = nil
|
|
95
122
|
super
|
|
96
123
|
end
|
|
@@ -98,7 +125,7 @@ module HTS
|
|
|
98
125
|
def write_header(header)
|
|
99
126
|
check_closed
|
|
100
127
|
|
|
101
|
-
@header = header.
|
|
128
|
+
@header = header.dup
|
|
102
129
|
LibHTS.bcf_hdr_write(@hts_file, header)
|
|
103
130
|
end
|
|
104
131
|
|
|
@@ -212,8 +239,7 @@ module HTS
|
|
|
212
239
|
def query(region, beg = nil, end_ = nil, copy: false, &block)
|
|
213
240
|
check_closed
|
|
214
241
|
|
|
215
|
-
raise "
|
|
216
|
-
raise "Index file is required to call the query method." unless index_loaded?
|
|
242
|
+
raise MissingIndexError, "Index file is required to call the query method for #{@file_name}" unless index_loaded?
|
|
217
243
|
|
|
218
244
|
case region
|
|
219
245
|
when Array
|
|
@@ -261,8 +287,10 @@ module HTS
|
|
|
261
287
|
def queryi_reuse(tid, beg, end_, &block)
|
|
262
288
|
return to_enum(__method__, tid, beg, end_) unless block_given?
|
|
263
289
|
|
|
290
|
+
return queryi_reuse_vcf(tid, beg, end_, &block) if tabix_index?
|
|
291
|
+
|
|
264
292
|
qiter = LibHTS.bcf_itr_queryi(@idx, tid, beg, end_)
|
|
265
|
-
raise "Failed to query region #{tid}
|
|
293
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
266
294
|
|
|
267
295
|
query_reuse_yield(qiter, &block)
|
|
268
296
|
self
|
|
@@ -271,8 +299,10 @@ module HTS
|
|
|
271
299
|
def querys_reuse(region, &block)
|
|
272
300
|
return to_enum(__method__, region) unless block_given?
|
|
273
301
|
|
|
274
|
-
|
|
275
|
-
|
|
302
|
+
return querys_reuse_vcf(region, &block) if tabix_index?
|
|
303
|
+
|
|
304
|
+
qiter = LibHTS.bcf_itr_querys(@idx, read_header, region)
|
|
305
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
276
306
|
|
|
277
307
|
query_reuse_yield(qiter, &block)
|
|
278
308
|
self
|
|
@@ -296,6 +326,7 @@ module HTS
|
|
|
296
326
|
break if slen == -1
|
|
297
327
|
raise if slen < -1
|
|
298
328
|
|
|
329
|
+
apply_subset!(record)
|
|
299
330
|
yield record
|
|
300
331
|
end
|
|
301
332
|
ensure
|
|
@@ -306,8 +337,10 @@ module HTS
|
|
|
306
337
|
def queryi_copy(tid, beg, end_, &block)
|
|
307
338
|
return to_enum(__method__, tid, beg, end_) unless block_given?
|
|
308
339
|
|
|
340
|
+
return queryi_copy_vcf(tid, beg, end_, &block) if tabix_index?
|
|
341
|
+
|
|
309
342
|
qiter = LibHTS.bcf_itr_queryi(@idx, tid, beg, end_)
|
|
310
|
-
raise "Failed to query region #{tid}
|
|
343
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
311
344
|
|
|
312
345
|
query_copy_yield(qiter, &block)
|
|
313
346
|
self
|
|
@@ -316,8 +349,10 @@ module HTS
|
|
|
316
349
|
def querys_copy(region, &block)
|
|
317
350
|
return to_enum(__method__, region) unless block_given?
|
|
318
351
|
|
|
319
|
-
|
|
320
|
-
|
|
352
|
+
return querys_copy_vcf(region, &block) if tabix_index?
|
|
353
|
+
|
|
354
|
+
qiter = LibHTS.bcf_itr_querys(@idx, read_header, region)
|
|
355
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
321
356
|
|
|
322
357
|
query_copy_yield(qiter, &block)
|
|
323
358
|
self
|
|
@@ -333,18 +368,94 @@ module HTS
|
|
|
333
368
|
end
|
|
334
369
|
|
|
335
370
|
def query_copy_yield(qiter)
|
|
371
|
+
bcf1 = LibHTS.bcf_init
|
|
372
|
+
record = Record.new(header, bcf1)
|
|
336
373
|
loop do
|
|
337
|
-
bcf1 = LibHTS.bcf_init
|
|
338
374
|
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
|
|
339
375
|
break if slen == -1
|
|
340
376
|
raise if slen < -1
|
|
341
377
|
|
|
342
|
-
|
|
378
|
+
apply_subset!(record)
|
|
379
|
+
yield record.dup
|
|
343
380
|
end
|
|
344
381
|
ensure
|
|
345
382
|
LibHTS.bcf_itr_destroy(qiter)
|
|
346
383
|
end
|
|
347
384
|
|
|
385
|
+
def tabix_index?
|
|
386
|
+
@index_format == :tabix
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def queryi_reuse_vcf(tid, beg, end_, &block)
|
|
390
|
+
qiter = LibHTS.tbx_itr_queryi(@idx, tid, beg, end_)
|
|
391
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
392
|
+
|
|
393
|
+
query_reuse_yield_vcf(qiter, &block)
|
|
394
|
+
self
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def querys_reuse_vcf(region, &block)
|
|
398
|
+
qiter = LibHTS.tbx_itr_querys(@idx, region)
|
|
399
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
400
|
+
|
|
401
|
+
query_reuse_yield_vcf(qiter, &block)
|
|
402
|
+
self
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def query_reuse_yield_vcf(qiter)
|
|
406
|
+
line = LibHTS::KString.new
|
|
407
|
+
bcf1 = LibHTS.bcf_init
|
|
408
|
+
record = Record.new(header, bcf1)
|
|
409
|
+
begin
|
|
410
|
+
while (slen = LibHTS.tbx_itr_next(@hts_file, @idx, qiter, line)) >= 0
|
|
411
|
+
raise QueryError, "Failed to parse VCF record in #{@file_name}" if LibHTS.vcf_parse(line, read_header,
|
|
412
|
+
bcf1) < 0
|
|
413
|
+
|
|
414
|
+
apply_subset!(record)
|
|
415
|
+
yield record
|
|
416
|
+
end
|
|
417
|
+
raise if slen < -1
|
|
418
|
+
ensure
|
|
419
|
+
line.free_buffer
|
|
420
|
+
LibHTS.hts_itr_destroy(qiter)
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def queryi_copy_vcf(tid, beg, end_, &block)
|
|
425
|
+
qiter = LibHTS.tbx_itr_queryi(@idx, tid, beg, end_)
|
|
426
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
427
|
+
|
|
428
|
+
query_copy_yield_vcf(qiter, &block)
|
|
429
|
+
self
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def querys_copy_vcf(region, &block)
|
|
433
|
+
qiter = LibHTS.tbx_itr_querys(@idx, region)
|
|
434
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
435
|
+
|
|
436
|
+
query_copy_yield_vcf(qiter, &block)
|
|
437
|
+
self
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def query_copy_yield_vcf(qiter)
|
|
441
|
+
line = LibHTS::KString.new
|
|
442
|
+
begin
|
|
443
|
+
while (slen = LibHTS.tbx_itr_next(@hts_file, @idx, qiter, line)) >= 0
|
|
444
|
+
bcf1 = LibHTS.bcf_init
|
|
445
|
+
raise QueryError, "Failed to parse VCF record in #{@file_name}" if LibHTS.vcf_parse(line, read_header,
|
|
446
|
+
bcf1) < 0
|
|
447
|
+
|
|
448
|
+
record = Record.new(header, bcf1)
|
|
449
|
+
apply_subset!(record)
|
|
450
|
+
yield record
|
|
451
|
+
end
|
|
452
|
+
raise if slen < -1
|
|
453
|
+
ensure
|
|
454
|
+
line.free_buffer
|
|
455
|
+
LibHTS.hts_itr_destroy(qiter)
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
|
|
348
459
|
def each_record_reuse
|
|
349
460
|
check_closed
|
|
350
461
|
|
|
@@ -352,7 +463,10 @@ module HTS
|
|
|
352
463
|
|
|
353
464
|
bcf1 = LibHTS.bcf_init
|
|
354
465
|
record = Record.new(header, bcf1)
|
|
355
|
-
|
|
466
|
+
while LibHTS.bcf_read(@hts_file, read_header, bcf1) != -1
|
|
467
|
+
apply_subset!(record)
|
|
468
|
+
yield record
|
|
469
|
+
end
|
|
356
470
|
self
|
|
357
471
|
end
|
|
358
472
|
|
|
@@ -361,11 +475,26 @@ module HTS
|
|
|
361
475
|
|
|
362
476
|
return to_enum(__method__) unless block_given?
|
|
363
477
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
478
|
+
bcf1 = LibHTS.bcf_init
|
|
479
|
+
record = Record.new(header, bcf1)
|
|
480
|
+
while LibHTS.bcf_read(@hts_file, read_header, bcf1) != -1
|
|
481
|
+
apply_subset!(record)
|
|
482
|
+
yield record.dup
|
|
367
483
|
end
|
|
368
484
|
self
|
|
369
485
|
end
|
|
486
|
+
|
|
487
|
+
def read_header
|
|
488
|
+
@read_header || header
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
def apply_subset!(record)
|
|
492
|
+
return unless header.subset?
|
|
493
|
+
|
|
494
|
+
rc = LibHTS.bcf_subset(header.struct, record.struct, header.subset_sample_count, header.subset_imap_pointer || ::FFI::Pointer::NULL)
|
|
495
|
+
return if rc >= 0
|
|
496
|
+
|
|
497
|
+
raise SubsetError, "Failed to subset samples #{header.subset_samples.inspect} while reading #{@file_name}"
|
|
498
|
+
end
|
|
370
499
|
end
|
|
371
500
|
end
|
data/lib/hts/faidx.rb
CHANGED
|
@@ -1,14 +1,21 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "../htslib"
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
module HTS
|
|
6
|
+
module LibC
|
|
7
|
+
extend FFI::Library
|
|
8
|
+
ffi_lib FFI::Library::LIBC
|
|
9
|
+
attach_function :free, [:pointer], :void
|
|
10
|
+
end
|
|
11
|
+
end
|
|
5
12
|
|
|
6
13
|
module HTS
|
|
7
14
|
class Faidx
|
|
8
|
-
attr_reader :file_name
|
|
15
|
+
attr_reader :file_name, :format
|
|
9
16
|
|
|
10
|
-
def self.open(
|
|
11
|
-
file = new(
|
|
17
|
+
def self.open(file_name, format: :auto, auto_build: true)
|
|
18
|
+
file = new(file_name, format:, auto_build:) # do not yield
|
|
12
19
|
return file unless block_given?
|
|
13
20
|
|
|
14
21
|
begin
|
|
@@ -19,19 +26,19 @@ module HTS
|
|
|
19
26
|
file
|
|
20
27
|
end
|
|
21
28
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
29
|
+
def self.build_index(file_name, fai_path = nil, gzi_path = nil)
|
|
30
|
+
case LibHTS.fai_build3(file_name, fai_path, gzi_path)
|
|
31
|
+
when 0
|
|
32
|
+
else raise HTS::Error, "Failed to build faidx index for #{file_name}"
|
|
26
33
|
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize(file_name, format: :auto, auto_build: true)
|
|
37
|
+
raise ArgumentError, "HTS::Faidx.new() does not take block; Please use HTS::Faidx.open() instead" if block_given?
|
|
27
38
|
|
|
28
|
-
@file_name = file_name
|
|
29
|
-
@
|
|
30
|
-
|
|
31
|
-
LibHTS.fai_load_format(@file_name, 2)
|
|
32
|
-
else
|
|
33
|
-
LibHTS.fai_load(@file_name)
|
|
34
|
-
end
|
|
39
|
+
@file_name = file_name.freeze
|
|
40
|
+
@format = resolve_format(@file_name, format)
|
|
41
|
+
@fai = load_handle(@file_name, @format, auto_build)
|
|
35
42
|
|
|
36
43
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
|
|
37
44
|
end
|
|
@@ -51,106 +58,136 @@ module HTS
|
|
|
51
58
|
@fai.nil? || @fai.null?
|
|
52
59
|
end
|
|
53
60
|
|
|
54
|
-
def
|
|
55
|
-
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# the number of sequences in the index.
|
|
59
|
-
def length
|
|
61
|
+
def size
|
|
62
|
+
check_closed
|
|
60
63
|
LibHTS.faidx_nseq(@fai)
|
|
61
64
|
end
|
|
62
|
-
alias size length
|
|
63
65
|
|
|
64
|
-
|
|
66
|
+
alias length size
|
|
67
|
+
|
|
65
68
|
def names
|
|
69
|
+
check_closed
|
|
66
70
|
Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
|
|
67
71
|
end
|
|
68
72
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def has_key?(key)
|
|
73
|
+
def has_seq?(key)
|
|
74
|
+
check_closed
|
|
72
75
|
raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
|
|
73
76
|
|
|
74
77
|
key = key.to_s
|
|
75
78
|
case LibHTS.faidx_has_seq(@fai, key)
|
|
76
79
|
when 1 then true
|
|
77
80
|
when 0 then false
|
|
78
|
-
else raise
|
|
81
|
+
else raise HTS::Error, "Unexpected return value from faidx_has_seq"
|
|
79
82
|
end
|
|
80
83
|
end
|
|
81
84
|
|
|
82
|
-
def [](name)
|
|
83
|
-
name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
|
|
84
|
-
Sequence.new(self, name)
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# return the length of the requested chromosome.
|
|
88
85
|
def seq_len(chrom)
|
|
86
|
+
check_closed
|
|
89
87
|
raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
|
90
88
|
|
|
91
89
|
chrom = chrom.to_s
|
|
92
|
-
result = LibHTS.
|
|
93
|
-
|
|
94
|
-
end
|
|
90
|
+
result = LibHTS.faidx_seq_len64(@fai, chrom)
|
|
91
|
+
raise ArgumentError, "Sequence not found: #{chrom}" if result == -1
|
|
95
92
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
# @param name [String] chr1:0-10
|
|
99
|
-
# @overload seq(name, start, stop)
|
|
100
|
-
# Fetch the sequence as a String.
|
|
101
|
-
# @param name [String] the name of the chromosome
|
|
102
|
-
# @param start [Integer] the start position of the sequence (0-based)
|
|
103
|
-
# @param stop [Integer] the end position of the sequence (0-based)
|
|
104
|
-
# @return [String] the sequence
|
|
93
|
+
result
|
|
94
|
+
end
|
|
105
95
|
|
|
106
96
|
def fetch_seq(name, start = nil, stop = nil)
|
|
97
|
+
check_closed
|
|
107
98
|
name = name.to_s
|
|
108
|
-
rlen = FFI::MemoryPointer.new(:int)
|
|
109
99
|
|
|
110
100
|
if start.nil? && stop.nil?
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
|
|
114
|
-
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
|
|
115
|
-
start > stop && raise(ArgumentError, "Expect start to be <= stop")
|
|
116
|
-
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
|
|
101
|
+
len = seq_len(name)
|
|
102
|
+
return "" if len.zero?
|
|
117
103
|
|
|
104
|
+
fetch_seq(name, 0, len - 1)
|
|
105
|
+
else
|
|
106
|
+
validate_range!(name, start, stop)
|
|
107
|
+
rlen = FFI::MemoryPointer.new(:int64)
|
|
118
108
|
result = LibHTS.faidx_fetch_seq64(@fai, name, start, stop, rlen)
|
|
109
|
+
fetch_result(result, rlen.read_int64, "sequence", name, start, stop)
|
|
119
110
|
end
|
|
120
|
-
|
|
121
|
-
case rlen.read_int
|
|
122
|
-
when -2 then raise "Invalid chromosome name: #{name}"
|
|
123
|
-
when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
result
|
|
127
111
|
end
|
|
128
112
|
|
|
129
|
-
alias seq fetch_seq
|
|
130
|
-
|
|
131
113
|
def fetch_qual(name, start = nil, stop = nil)
|
|
114
|
+
check_closed
|
|
115
|
+
raise HTS::Error, "Quality is only available for FASTQ indexes" unless format == :fastq
|
|
116
|
+
|
|
132
117
|
name = name.to_s
|
|
133
|
-
rlen = FFI::MemoryPointer.new(:int)
|
|
134
118
|
|
|
135
119
|
if start.nil? && stop.nil?
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
|
|
139
|
-
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
|
|
140
|
-
start > stop && raise(ArgumentError, "Expect start to be <= stop")
|
|
141
|
-
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
|
|
120
|
+
len = seq_len(name)
|
|
121
|
+
return "" if len.zero?
|
|
142
122
|
|
|
123
|
+
fetch_qual(name, 0, len - 1)
|
|
124
|
+
else
|
|
125
|
+
validate_range!(name, start, stop)
|
|
126
|
+
rlen = FFI::MemoryPointer.new(:int64)
|
|
143
127
|
result = LibHTS.faidx_fetch_qual64(@fai, name, start, stop, rlen)
|
|
128
|
+
fetch_result(result, rlen.read_int64, "quality", name, start, stop)
|
|
144
129
|
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def build_index(fai_path = nil, gzi_path = nil)
|
|
133
|
+
self.class.build_index(@file_name, fai_path, gzi_path)
|
|
134
|
+
self
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
private
|
|
145
138
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
139
|
+
def check_closed
|
|
140
|
+
raise IOError, "closed Faidx" if closed?
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def validate_range!(name, start, stop)
|
|
144
|
+
raise ArgumentError, "Expect start to be >= 0" if start < 0
|
|
145
|
+
raise ArgumentError, "Expect stop to be >= 0" if stop < 0
|
|
146
|
+
raise ArgumentError, "Expect start to be <= stop" if start > stop
|
|
147
|
+
|
|
148
|
+
len = seq_len(name)
|
|
149
|
+
raise ArgumentError, "Expect stop to be < seq_len (#{len})" if stop >= len
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def fetch_result(ptr, len, kind, name, start, stop)
|
|
153
|
+
case len
|
|
154
|
+
when -2 then raise ArgumentError, "Sequence not found: #{name}"
|
|
155
|
+
when -1 then raise HTS::Error, "Error fetching #{kind}: #{name}:#{start}-#{stop}"
|
|
149
156
|
end
|
|
150
157
|
|
|
151
|
-
|
|
158
|
+
raise HTS::Error, "Error fetching #{kind}: #{name}:#{start}-#{stop}" if ptr.null?
|
|
159
|
+
|
|
160
|
+
begin
|
|
161
|
+
ptr.read_string_length(len)
|
|
162
|
+
ensure
|
|
163
|
+
HTS::LibC.free(ptr)
|
|
164
|
+
end
|
|
152
165
|
end
|
|
153
166
|
|
|
154
|
-
|
|
167
|
+
def load_handle(file_name, format, auto_build)
|
|
168
|
+
case [format, auto_build]
|
|
169
|
+
when [:fasta, true]
|
|
170
|
+
LibHTS.fai_load_format(file_name, :FAI_FASTA)
|
|
171
|
+
when [:fastq, true]
|
|
172
|
+
LibHTS.fai_load_format(file_name, :FAI_FASTQ)
|
|
173
|
+
when [:fasta, false]
|
|
174
|
+
LibHTS.fai_load3_format(file_name, nil, nil, 0, :FAI_FASTA)
|
|
175
|
+
when [:fastq, false]
|
|
176
|
+
LibHTS.fai_load3_format(file_name, nil, nil, 0, :FAI_FASTQ)
|
|
177
|
+
else
|
|
178
|
+
raise ArgumentError, "Unsupported format: #{format}"
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def resolve_format(file_name, format)
|
|
183
|
+
case format
|
|
184
|
+
when :auto
|
|
185
|
+
file_name.match?(/\.(fastq|fq)(\.gz|\.bgz)?\z/i) ? :fastq : :fasta
|
|
186
|
+
when :fasta, :fastq
|
|
187
|
+
format
|
|
188
|
+
else
|
|
189
|
+
raise ArgumentError, "Unsupported format: #{format}"
|
|
190
|
+
end
|
|
191
|
+
end
|
|
155
192
|
end
|
|
156
193
|
end
|
data/lib/hts/hts.rb
CHANGED
|
@@ -71,7 +71,10 @@ module HTS
|
|
|
71
71
|
|
|
72
72
|
def fai=(fai)
|
|
73
73
|
check_closed
|
|
74
|
-
LibHTS.hts_set_fai_filename(@hts_file, fai)
|
|
74
|
+
r = LibHTS.hts_set_fai_filename(@hts_file, fai)
|
|
75
|
+
raise "Failed to load fasta index: #{fai}" if r.negative?
|
|
76
|
+
|
|
77
|
+
self
|
|
75
78
|
end
|
|
76
79
|
|
|
77
80
|
def set_threads(n = nil)
|
data/lib/hts/libhts/constants.rb
CHANGED
|
@@ -12,6 +12,27 @@ module HTS
|
|
|
12
12
|
:l, :size_t,
|
|
13
13
|
:m, :size_t,
|
|
14
14
|
:s, :string
|
|
15
|
+
|
|
16
|
+
def buffer_ptr
|
|
17
|
+
to_ptr.get_pointer(self.class.offset_of(:s))
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def read_string_copy
|
|
21
|
+
ptr = buffer_ptr
|
|
22
|
+
return "" if ptr.null?
|
|
23
|
+
|
|
24
|
+
ptr.read_string(self[:l])
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def free_buffer
|
|
28
|
+
ptr = buffer_ptr
|
|
29
|
+
return if ptr.null?
|
|
30
|
+
|
|
31
|
+
LibHTS.hts_free(ptr)
|
|
32
|
+
to_ptr.put_pointer(self.class.offset_of(:s), FFI::Pointer::NULL)
|
|
33
|
+
self[:l] = 0
|
|
34
|
+
self[:m] = 0
|
|
35
|
+
end
|
|
15
36
|
end
|
|
16
37
|
|
|
17
38
|
class KSeq < FFI::Struct
|
|
@@ -430,6 +451,17 @@ module HTS
|
|
|
430
451
|
:idx, HtsIdx.ptr,
|
|
431
452
|
:dict, :pointer
|
|
432
453
|
|
|
454
|
+
def close
|
|
455
|
+
return if @closed
|
|
456
|
+
|
|
457
|
+
ptr = to_ptr
|
|
458
|
+
unless ptr.null?
|
|
459
|
+
ptr.autorelease = false if ptr.respond_to?(:autorelease=)
|
|
460
|
+
self.class.release(ptr)
|
|
461
|
+
end
|
|
462
|
+
@closed = true
|
|
463
|
+
end
|
|
464
|
+
|
|
433
465
|
def self.release(ptr)
|
|
434
466
|
LibHTS.tbx_destroy(ptr) unless ptr.null?
|
|
435
467
|
end
|
|
@@ -439,7 +471,13 @@ module HTS
|
|
|
439
471
|
|
|
440
472
|
FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
|
|
441
473
|
|
|
442
|
-
|
|
474
|
+
# Faidx represents a faidx_t handle which is treated as a
|
|
475
|
+
# file-level RAII object in HTS::Faidx. It is intentionally
|
|
476
|
+
# kept as a plain Struct and is destroyed explicitly via
|
|
477
|
+
# LibHTS.fai_destroy in HTS::Faidx#close. Do not convert this
|
|
478
|
+
# to ManagedStruct; that would interfere with the explicit
|
|
479
|
+
# lifetime managed by the Ruby wrapper.
|
|
480
|
+
class Faidx < FFI::Struct
|
|
443
481
|
layout :bgzf, BGZF.ptr,
|
|
444
482
|
:n, :int,
|
|
445
483
|
:m, :int,
|
|
@@ -460,7 +498,7 @@ module HTS
|
|
|
460
498
|
:n, :int
|
|
461
499
|
end
|
|
462
500
|
|
|
463
|
-
# Complete textual representation of a header line
|
|
501
|
+
# Complete textual representation of a header line owned by Ruby.
|
|
464
502
|
class BcfHrec < FFI::ManagedStruct
|
|
465
503
|
layout \
|
|
466
504
|
:type, :int,
|
|
@@ -497,7 +535,7 @@ module HTS
|
|
|
497
535
|
class BcfIdinfo < FFI::Struct
|
|
498
536
|
layout \
|
|
499
537
|
:info, [:uint64, 3],
|
|
500
|
-
:hrec, [
|
|
538
|
+
:hrec, [:pointer, 3],
|
|
501
539
|
:id, :int
|
|
502
540
|
end
|
|
503
541
|
|
data/lib/hts/libhts/cram.rb
CHANGED
|
@@ -330,11 +330,6 @@ module HTS
|
|
|
330
330
|
%i[cram_fd cram_block cram_metrics int int],
|
|
331
331
|
:int
|
|
332
332
|
|
|
333
|
-
# attach_function \
|
|
334
|
-
# :cram_compress_block2,
|
|
335
|
-
# %i[cram_fd cram_slice cram_block cram_metrics int int],
|
|
336
|
-
# :int
|
|
337
|
-
|
|
338
333
|
# Creates a new container, specifying the maximum number of slices
|
|
339
334
|
# and records permitted.
|
|
340
335
|
attach_function \
|