htslib 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +23 -1
- data/lib/hts/bam/auxi.rb +228 -19
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +119 -36
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +85 -102
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +34 -2
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +10 -5
- data/lib/hts/version.rb +1 -1
- metadata +4 -4
- data/lib/hts/faidx/sequence.rb +0 -62
data/lib/hts/bcf/record.rb
CHANGED
|
@@ -54,11 +54,11 @@ module HTS
|
|
|
54
54
|
end
|
|
55
55
|
|
|
56
56
|
def id=(id)
|
|
57
|
-
LibHTS.bcf_update_id(@header, @bcf1, id)
|
|
57
|
+
LibHTS.bcf_update_id(@header.struct, @bcf1, id)
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
def clear_id
|
|
61
|
-
LibHTS.bcf_update_id(@header, @bcf1, ".")
|
|
61
|
+
LibHTS.bcf_update_id(@header.struct, @bcf1, ".")
|
|
62
62
|
end
|
|
63
63
|
|
|
64
64
|
def ref
|
|
@@ -100,7 +100,7 @@ module HTS
|
|
|
100
100
|
when 1
|
|
101
101
|
id = d[:flt].read_int
|
|
102
102
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, id)
|
|
103
|
-
when 2..
|
|
103
|
+
when 2..
|
|
104
104
|
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
|
105
105
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
|
106
106
|
end
|
|
@@ -130,9 +130,13 @@ module HTS
|
|
|
130
130
|
|
|
131
131
|
def to_s
|
|
132
132
|
ksr = LibHTS::KString.new
|
|
133
|
-
|
|
133
|
+
begin
|
|
134
|
+
raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
|
|
134
135
|
|
|
135
|
-
|
|
136
|
+
ksr.read_string_copy
|
|
137
|
+
ensure
|
|
138
|
+
ksr.free_buffer
|
|
139
|
+
end
|
|
136
140
|
end
|
|
137
141
|
|
|
138
142
|
private
|
data/lib/hts/bcf.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require_relative "../htslib"
|
|
4
4
|
|
|
5
5
|
require_relative "hts"
|
|
6
|
+
require_relative "bcf/errors"
|
|
6
7
|
require_relative "bcf/header"
|
|
7
8
|
require_relative "bcf/info"
|
|
8
9
|
require_relative "bcf/format"
|
|
@@ -27,8 +28,27 @@ module HTS
|
|
|
27
28
|
file
|
|
28
29
|
end
|
|
29
30
|
|
|
31
|
+
def self.build_index(file_name, index_name = nil, min_shift = 14, threads = 0, verbose = true)
|
|
32
|
+
if verbose
|
|
33
|
+
if index_name
|
|
34
|
+
warn "Create index for #{file_name} to #{index_name}"
|
|
35
|
+
else
|
|
36
|
+
warn "Create index for #{file_name}"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
case LibHTS.bcf_index_build3(file_name, index_name, min_shift, threads)
|
|
41
|
+
when 0 # successful
|
|
42
|
+
when -1 then raise IndexError, "Indexing failed for #{file_name}"
|
|
43
|
+
when -2 then raise IndexError, "Opening #{file_name} failed while building the index"
|
|
44
|
+
when -3 then raise IndexError, "#{file_name} is not in an indexable format"
|
|
45
|
+
when -4 then raise IndexError, "Failed to create or save the index for #{file_name}"
|
|
46
|
+
else raise IndexError, "Unknown index build error for #{file_name}"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
30
50
|
def initialize(file_name, mode = "r", index: nil, threads: nil,
|
|
31
|
-
build_index: false)
|
|
51
|
+
build_index: false, subset: nil)
|
|
32
52
|
if block_given?
|
|
33
53
|
message = "HTS::Bcf.new() does not take block; Please use HTS::Bcf.open() instead"
|
|
34
54
|
raise message
|
|
@@ -42,43 +62,43 @@ module HTS
|
|
|
42
62
|
@nthreads = threads
|
|
43
63
|
@hts_file = LibHTS.hts_open(@file_name, mode)
|
|
44
64
|
|
|
45
|
-
raise
|
|
65
|
+
raise OpenError, "Failed to open #{@file_name}" if @hts_file.null?
|
|
46
66
|
|
|
47
67
|
set_threads(threads) if threads
|
|
48
68
|
|
|
69
|
+
raise SubsetError, "Sample subsetting is only available when reading BCF/VCF files" if subset && @mode[0] == "w"
|
|
70
|
+
|
|
49
71
|
return if @mode[0] == "w"
|
|
50
72
|
|
|
51
|
-
@
|
|
73
|
+
@read_header = Bcf::Header.new(@hts_file)
|
|
74
|
+
@header = subset ? @read_header.subset(subset) : @read_header
|
|
52
75
|
build_index(index) if build_index
|
|
53
76
|
@idx = load_index(index)
|
|
54
77
|
@start_position = tell
|
|
55
78
|
end
|
|
56
79
|
|
|
57
|
-
def build_index(index_name = nil, min_shift: 14,
|
|
80
|
+
def build_index(index_name = nil, min_shift: 14, verbose: true)
|
|
58
81
|
check_closed
|
|
59
82
|
|
|
60
|
-
|
|
61
|
-
warn "Create index for #{@file_name} to #{index_name}"
|
|
62
|
-
else
|
|
63
|
-
warn "Create index for #{@file_name}"
|
|
64
|
-
end
|
|
65
|
-
case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads || threads)
|
|
66
|
-
when 0 # successful
|
|
67
|
-
when -1 then raise "indexing failed"
|
|
68
|
-
when -2 then raise "opening #{@file_name} failed"
|
|
69
|
-
when -3 then raise "format not indexable"
|
|
70
|
-
when -4 then raise "failed to create and/or save the index"
|
|
71
|
-
else raise "unknown error"
|
|
72
|
-
end
|
|
83
|
+
self.class.build_index(@file_name, index_name, min_shift, @nthreads || 0, verbose)
|
|
73
84
|
self # for method chaining
|
|
74
85
|
end
|
|
75
86
|
|
|
76
87
|
def load_index(index_name = nil)
|
|
77
88
|
check_closed
|
|
78
89
|
|
|
79
|
-
if
|
|
90
|
+
if file_format == "vcf"
|
|
91
|
+
@index_format = :tabix
|
|
92
|
+
if index_name
|
|
93
|
+
LibHTS.tbx_index_load2(@file_name, index_name)
|
|
94
|
+
else
|
|
95
|
+
LibHTS.tbx_index_load3(@file_name, nil, 2)
|
|
96
|
+
end
|
|
97
|
+
elsif index_name
|
|
98
|
+
@index_format = :bcf
|
|
80
99
|
LibHTS.bcf_index_load2(@file_name, index_name)
|
|
81
100
|
else
|
|
101
|
+
@index_format = :bcf
|
|
82
102
|
LibHTS.bcf_index_load3(@file_name, nil, 2)
|
|
83
103
|
end
|
|
84
104
|
end
|
|
@@ -90,7 +110,14 @@ module HTS
|
|
|
90
110
|
end
|
|
91
111
|
|
|
92
112
|
def close
|
|
93
|
-
|
|
113
|
+
if @idx && !@idx.null?
|
|
114
|
+
case @index_format
|
|
115
|
+
when :bcf
|
|
116
|
+
LibHTS.hts_idx_destroy(@idx)
|
|
117
|
+
when :tabix
|
|
118
|
+
@idx.close
|
|
119
|
+
end
|
|
120
|
+
end
|
|
94
121
|
@idx = nil
|
|
95
122
|
super
|
|
96
123
|
end
|
|
@@ -98,7 +125,7 @@ module HTS
|
|
|
98
125
|
def write_header(header)
|
|
99
126
|
check_closed
|
|
100
127
|
|
|
101
|
-
@header = header.
|
|
128
|
+
@header = header.dup
|
|
102
129
|
LibHTS.bcf_hdr_write(@hts_file, header)
|
|
103
130
|
end
|
|
104
131
|
|
|
@@ -212,8 +239,7 @@ module HTS
|
|
|
212
239
|
def query(region, beg = nil, end_ = nil, copy: false, &block)
|
|
213
240
|
check_closed
|
|
214
241
|
|
|
215
|
-
raise "
|
|
216
|
-
raise "Index file is required to call the query method." unless index_loaded?
|
|
242
|
+
raise MissingIndexError, "Index file is required to call the query method for #{@file_name}" unless index_loaded?
|
|
217
243
|
|
|
218
244
|
case region
|
|
219
245
|
when Array
|
|
@@ -261,8 +287,10 @@ module HTS
|
|
|
261
287
|
def queryi_reuse(tid, beg, end_, &block)
|
|
262
288
|
return to_enum(__method__, tid, beg, end_) unless block_given?
|
|
263
289
|
|
|
290
|
+
return queryi_reuse_vcf(tid, beg, end_, &block) if tabix_index?
|
|
291
|
+
|
|
264
292
|
qiter = LibHTS.bcf_itr_queryi(@idx, tid, beg, end_)
|
|
265
|
-
raise "Failed to query region #{tid}
|
|
293
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
266
294
|
|
|
267
295
|
query_reuse_yield(qiter, &block)
|
|
268
296
|
self
|
|
@@ -271,8 +299,10 @@ module HTS
|
|
|
271
299
|
def querys_reuse(region, &block)
|
|
272
300
|
return to_enum(__method__, region) unless block_given?
|
|
273
301
|
|
|
274
|
-
|
|
275
|
-
|
|
302
|
+
return querys_reuse_vcf(region, &block) if tabix_index?
|
|
303
|
+
|
|
304
|
+
qiter = LibHTS.bcf_itr_querys(@idx, read_header, region)
|
|
305
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
276
306
|
|
|
277
307
|
query_reuse_yield(qiter, &block)
|
|
278
308
|
self
|
|
@@ -296,6 +326,7 @@ module HTS
|
|
|
296
326
|
break if slen == -1
|
|
297
327
|
raise if slen < -1
|
|
298
328
|
|
|
329
|
+
apply_subset!(record)
|
|
299
330
|
yield record
|
|
300
331
|
end
|
|
301
332
|
ensure
|
|
@@ -306,8 +337,10 @@ module HTS
|
|
|
306
337
|
def queryi_copy(tid, beg, end_, &block)
|
|
307
338
|
return to_enum(__method__, tid, beg, end_) unless block_given?
|
|
308
339
|
|
|
340
|
+
return queryi_copy_vcf(tid, beg, end_, &block) if tabix_index?
|
|
341
|
+
|
|
309
342
|
qiter = LibHTS.bcf_itr_queryi(@idx, tid, beg, end_)
|
|
310
|
-
raise "Failed to query region #{tid}
|
|
343
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
311
344
|
|
|
312
345
|
query_copy_yield(qiter, &block)
|
|
313
346
|
self
|
|
@@ -316,8 +349,10 @@ module HTS
|
|
|
316
349
|
def querys_copy(region, &block)
|
|
317
350
|
return to_enum(__method__, region) unless block_given?
|
|
318
351
|
|
|
319
|
-
|
|
320
|
-
|
|
352
|
+
return querys_copy_vcf(region, &block) if tabix_index?
|
|
353
|
+
|
|
354
|
+
qiter = LibHTS.bcf_itr_querys(@idx, read_header, region)
|
|
355
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
321
356
|
|
|
322
357
|
query_copy_yield(qiter, &block)
|
|
323
358
|
self
|
|
@@ -333,18 +368,94 @@ module HTS
|
|
|
333
368
|
end
|
|
334
369
|
|
|
335
370
|
def query_copy_yield(qiter)
|
|
371
|
+
bcf1 = LibHTS.bcf_init
|
|
372
|
+
record = Record.new(header, bcf1)
|
|
336
373
|
loop do
|
|
337
|
-
bcf1 = LibHTS.bcf_init
|
|
338
374
|
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
|
|
339
375
|
break if slen == -1
|
|
340
376
|
raise if slen < -1
|
|
341
377
|
|
|
342
|
-
|
|
378
|
+
apply_subset!(record)
|
|
379
|
+
yield record.dup
|
|
343
380
|
end
|
|
344
381
|
ensure
|
|
345
382
|
LibHTS.bcf_itr_destroy(qiter)
|
|
346
383
|
end
|
|
347
384
|
|
|
385
|
+
def tabix_index?
|
|
386
|
+
@index_format == :tabix
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def queryi_reuse_vcf(tid, beg, end_, &block)
|
|
390
|
+
qiter = LibHTS.tbx_itr_queryi(@idx, tid, beg, end_)
|
|
391
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
392
|
+
|
|
393
|
+
query_reuse_yield_vcf(qiter, &block)
|
|
394
|
+
self
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def querys_reuse_vcf(region, &block)
|
|
398
|
+
qiter = LibHTS.tbx_itr_querys(@idx, region)
|
|
399
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
400
|
+
|
|
401
|
+
query_reuse_yield_vcf(qiter, &block)
|
|
402
|
+
self
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def query_reuse_yield_vcf(qiter)
|
|
406
|
+
line = LibHTS::KString.new
|
|
407
|
+
bcf1 = LibHTS.bcf_init
|
|
408
|
+
record = Record.new(header, bcf1)
|
|
409
|
+
begin
|
|
410
|
+
while (slen = LibHTS.tbx_itr_next(@hts_file, @idx, qiter, line)) >= 0
|
|
411
|
+
raise QueryError, "Failed to parse VCF record in #{@file_name}" if LibHTS.vcf_parse(line, read_header,
|
|
412
|
+
bcf1) < 0
|
|
413
|
+
|
|
414
|
+
apply_subset!(record)
|
|
415
|
+
yield record
|
|
416
|
+
end
|
|
417
|
+
raise if slen < -1
|
|
418
|
+
ensure
|
|
419
|
+
line.free_buffer
|
|
420
|
+
LibHTS.hts_itr_destroy(qiter)
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def queryi_copy_vcf(tid, beg, end_, &block)
|
|
425
|
+
qiter = LibHTS.tbx_itr_queryi(@idx, tid, beg, end_)
|
|
426
|
+
raise QueryError, "Failed to query region #{tid}:#{beg}-#{end_} in #{@file_name}" if qiter.null?
|
|
427
|
+
|
|
428
|
+
query_copy_yield_vcf(qiter, &block)
|
|
429
|
+
self
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def querys_copy_vcf(region, &block)
|
|
433
|
+
qiter = LibHTS.tbx_itr_querys(@idx, region)
|
|
434
|
+
raise QueryError, "Failed to query region #{region.inspect} in #{@file_name}" if qiter.null?
|
|
435
|
+
|
|
436
|
+
query_copy_yield_vcf(qiter, &block)
|
|
437
|
+
self
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def query_copy_yield_vcf(qiter)
|
|
441
|
+
line = LibHTS::KString.new
|
|
442
|
+
begin
|
|
443
|
+
while (slen = LibHTS.tbx_itr_next(@hts_file, @idx, qiter, line)) >= 0
|
|
444
|
+
bcf1 = LibHTS.bcf_init
|
|
445
|
+
raise QueryError, "Failed to parse VCF record in #{@file_name}" if LibHTS.vcf_parse(line, read_header,
|
|
446
|
+
bcf1) < 0
|
|
447
|
+
|
|
448
|
+
record = Record.new(header, bcf1)
|
|
449
|
+
apply_subset!(record)
|
|
450
|
+
yield record
|
|
451
|
+
end
|
|
452
|
+
raise if slen < -1
|
|
453
|
+
ensure
|
|
454
|
+
line.free_buffer
|
|
455
|
+
LibHTS.hts_itr_destroy(qiter)
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
|
|
348
459
|
def each_record_reuse
|
|
349
460
|
check_closed
|
|
350
461
|
|
|
@@ -352,7 +463,10 @@ module HTS
|
|
|
352
463
|
|
|
353
464
|
bcf1 = LibHTS.bcf_init
|
|
354
465
|
record = Record.new(header, bcf1)
|
|
355
|
-
|
|
466
|
+
while LibHTS.bcf_read(@hts_file, read_header, bcf1) != -1
|
|
467
|
+
apply_subset!(record)
|
|
468
|
+
yield record
|
|
469
|
+
end
|
|
356
470
|
self
|
|
357
471
|
end
|
|
358
472
|
|
|
@@ -361,11 +475,26 @@ module HTS
|
|
|
361
475
|
|
|
362
476
|
return to_enum(__method__) unless block_given?
|
|
363
477
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
478
|
+
bcf1 = LibHTS.bcf_init
|
|
479
|
+
record = Record.new(header, bcf1)
|
|
480
|
+
while LibHTS.bcf_read(@hts_file, read_header, bcf1) != -1
|
|
481
|
+
apply_subset!(record)
|
|
482
|
+
yield record.dup
|
|
367
483
|
end
|
|
368
484
|
self
|
|
369
485
|
end
|
|
486
|
+
|
|
487
|
+
def read_header
|
|
488
|
+
@read_header || header
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
def apply_subset!(record)
|
|
492
|
+
return unless header.subset?
|
|
493
|
+
|
|
494
|
+
rc = LibHTS.bcf_subset(header.struct, record.struct, header.subset_sample_count, header.subset_imap_pointer || ::FFI::Pointer::NULL)
|
|
495
|
+
return if rc >= 0
|
|
496
|
+
|
|
497
|
+
raise SubsetError, "Failed to subset samples #{header.subset_samples.inspect} while reading #{@file_name}"
|
|
498
|
+
end
|
|
370
499
|
end
|
|
371
500
|
end
|
data/lib/hts/faidx.rb
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "../htslib"
|
|
4
|
-
require_relative "faidx/sequence"
|
|
5
4
|
|
|
6
5
|
module HTS
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
module LibC
|
|
7
|
+
extend FFI::Library
|
|
8
|
+
ffi_lib FFI::Library::LIBC
|
|
9
|
+
attach_function :free, [:pointer], :void
|
|
10
|
+
end
|
|
11
|
+
end
|
|
9
12
|
|
|
10
|
-
|
|
13
|
+
module HTS
|
|
14
|
+
class Faidx
|
|
15
|
+
attr_reader :file_name, :format
|
|
11
16
|
|
|
12
|
-
def self.open(
|
|
13
|
-
file = new(
|
|
17
|
+
def self.open(file_name, format: :auto, auto_build: true)
|
|
18
|
+
file = new(file_name, format:, auto_build:) # do not yield
|
|
14
19
|
return file unless block_given?
|
|
15
20
|
|
|
16
21
|
begin
|
|
@@ -21,16 +26,19 @@ module HTS
|
|
|
21
26
|
file
|
|
22
27
|
end
|
|
23
28
|
|
|
24
|
-
def
|
|
29
|
+
def self.build_index(file_name, fai_path = nil, gzi_path = nil)
|
|
30
|
+
case LibHTS.fai_build3(file_name, fai_path, gzi_path)
|
|
31
|
+
when 0
|
|
32
|
+
else raise HTS::Error, "Failed to build faidx index for #{file_name}"
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize(file_name, format: :auto, auto_build: true)
|
|
25
37
|
raise ArgumentError, "HTS::Faidx.new() does not take block; Please use HTS::Faidx.open() instead" if block_given?
|
|
26
38
|
|
|
27
39
|
@file_name = file_name.freeze
|
|
28
|
-
@
|
|
29
|
-
|
|
30
|
-
LibHTS.fai_load_format(@file_name, 2)
|
|
31
|
-
else
|
|
32
|
-
LibHTS.fai_load(@file_name)
|
|
33
|
-
end
|
|
40
|
+
@format = resolve_format(@file_name, format)
|
|
41
|
+
@fai = load_handle(@file_name, @format, auto_build)
|
|
34
42
|
|
|
35
43
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
|
|
36
44
|
end
|
|
@@ -50,42 +58,19 @@ module HTS
|
|
|
50
58
|
@fai.nil? || @fai.null?
|
|
51
59
|
end
|
|
52
60
|
|
|
53
|
-
def
|
|
54
|
-
check_closed
|
|
55
|
-
@fai[:format]
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# Iterate over each sequence in the index.
|
|
59
|
-
# @yield [Sequence] each sequence object
|
|
60
|
-
# @return [Enumerator] if no block given
|
|
61
|
-
def each
|
|
62
|
-
return to_enum(__method__) unless block_given?
|
|
63
|
-
|
|
64
|
-
check_closed
|
|
65
|
-
names.each { |name| yield self[name] }
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# the number of sequences in the index.
|
|
69
|
-
# @return [Integer] the number of sequences
|
|
70
|
-
def length
|
|
61
|
+
def size
|
|
71
62
|
check_closed
|
|
72
63
|
LibHTS.faidx_nseq(@fai)
|
|
73
64
|
end
|
|
74
|
-
alias size length
|
|
75
65
|
|
|
76
|
-
|
|
77
|
-
|
|
66
|
+
alias length size
|
|
67
|
+
|
|
78
68
|
def names
|
|
79
69
|
check_closed
|
|
80
70
|
Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
|
|
81
71
|
end
|
|
82
72
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# Check if a sequence exists in the index.
|
|
86
|
-
# @param key [String, Symbol] sequence name
|
|
87
|
-
# @return [Boolean] true if the sequence exists
|
|
88
|
-
def has_key?(key)
|
|
73
|
+
def has_seq?(key)
|
|
89
74
|
check_closed
|
|
90
75
|
raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
|
|
91
76
|
|
|
@@ -97,94 +82,57 @@ module HTS
|
|
|
97
82
|
end
|
|
98
83
|
end
|
|
99
84
|
|
|
100
|
-
# Get a Sequence object by name or index.
|
|
101
|
-
# @param name [String, Symbol, Integer] sequence name or index
|
|
102
|
-
# @return [Sequence] the sequence object
|
|
103
|
-
# @raise [ArgumentError] if the sequence does not exist
|
|
104
|
-
def [](name)
|
|
105
|
-
check_closed
|
|
106
|
-
name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
|
|
107
|
-
Sequence.new(self, name)
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
# Return the length of the requested chromosome.
|
|
111
|
-
# @param chrom [String, Symbol] chromosome name
|
|
112
|
-
# @return [Integer] sequence length
|
|
113
|
-
# @raise [ArgumentError] if the sequence does not exist
|
|
114
85
|
def seq_len(chrom)
|
|
115
86
|
check_closed
|
|
116
87
|
raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
|
117
88
|
|
|
118
89
|
chrom = chrom.to_s
|
|
119
|
-
result = LibHTS.
|
|
90
|
+
result = LibHTS.faidx_seq_len64(@fai, chrom)
|
|
120
91
|
raise ArgumentError, "Sequence not found: #{chrom}" if result == -1
|
|
121
92
|
|
|
122
93
|
result
|
|
123
94
|
end
|
|
124
95
|
|
|
125
|
-
# @overload fetch_seq(name)
|
|
126
|
-
# Fetch the sequence as a String.
|
|
127
|
-
# @param name [String, Symbol] chr1:0-10
|
|
128
|
-
# @return [String] the sequence
|
|
129
|
-
# @overload fetch_seq(name, start, stop)
|
|
130
|
-
# Fetch the sequence as a String.
|
|
131
|
-
# @param name [String, Symbol] the name of the chromosome
|
|
132
|
-
# @param start [Integer] the start position of the sequence (0-based)
|
|
133
|
-
# @param stop [Integer] the end position of the sequence (0-based)
|
|
134
|
-
# @return [String] the sequence
|
|
135
96
|
def fetch_seq(name, start = nil, stop = nil)
|
|
136
97
|
check_closed
|
|
137
98
|
name = name.to_s
|
|
138
|
-
rlen = FFI::MemoryPointer.new(:int)
|
|
139
99
|
|
|
140
100
|
if start.nil? && stop.nil?
|
|
141
|
-
|
|
101
|
+
len = seq_len(name)
|
|
102
|
+
return "" if len.zero?
|
|
103
|
+
|
|
104
|
+
fetch_seq(name, 0, len - 1)
|
|
142
105
|
else
|
|
143
106
|
validate_range!(name, start, stop)
|
|
107
|
+
rlen = FFI::MemoryPointer.new(:int64)
|
|
144
108
|
result = LibHTS.faidx_fetch_seq64(@fai, name, start, stop, rlen)
|
|
109
|
+
fetch_result(result, rlen.read_int64, "sequence", name, start, stop)
|
|
145
110
|
end
|
|
146
|
-
|
|
147
|
-
case rlen.read_int
|
|
148
|
-
when -2 then raise ArgumentError, "Invalid chromosome name: #{name}"
|
|
149
|
-
when -1 then raise HTS::Error, "Error fetching sequence: #{name}:#{start}-#{stop}"
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
result
|
|
153
111
|
end
|
|
154
112
|
|
|
155
|
-
alias seq fetch_seq
|
|
156
|
-
|
|
157
|
-
# @overload fetch_qual(name)
|
|
158
|
-
# Fetch the quality string.
|
|
159
|
-
# @param name [String, Symbol] sequence name
|
|
160
|
-
# @return [String] the quality string
|
|
161
|
-
# @overload fetch_qual(name, start, stop)
|
|
162
|
-
# Fetch the quality string.
|
|
163
|
-
# @param name [String, Symbol] the name of the chromosome
|
|
164
|
-
# @param start [Integer] the start position of the sequence (0-based)
|
|
165
|
-
# @param stop [Integer] the end position of the sequence (0-based)
|
|
166
|
-
# @return [String] the quality string
|
|
167
113
|
def fetch_qual(name, start = nil, stop = nil)
|
|
168
114
|
check_closed
|
|
115
|
+
raise HTS::Error, "Quality is only available for FASTQ indexes" unless format == :fastq
|
|
116
|
+
|
|
169
117
|
name = name.to_s
|
|
170
|
-
rlen = FFI::MemoryPointer.new(:int)
|
|
171
118
|
|
|
172
119
|
if start.nil? && stop.nil?
|
|
173
|
-
|
|
120
|
+
len = seq_len(name)
|
|
121
|
+
return "" if len.zero?
|
|
122
|
+
|
|
123
|
+
fetch_qual(name, 0, len - 1)
|
|
174
124
|
else
|
|
175
125
|
validate_range!(name, start, stop)
|
|
126
|
+
rlen = FFI::MemoryPointer.new(:int64)
|
|
176
127
|
result = LibHTS.faidx_fetch_qual64(@fai, name, start, stop, rlen)
|
|
128
|
+
fetch_result(result, rlen.read_int64, "quality", name, start, stop)
|
|
177
129
|
end
|
|
178
|
-
|
|
179
|
-
case rlen.read_int
|
|
180
|
-
when -2 then raise ArgumentError, "Invalid chromosome name: #{name}"
|
|
181
|
-
when -1 then raise HTS::Error, "Error fetching quality: #{name}:#{start}-#{stop}"
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
result
|
|
185
130
|
end
|
|
186
131
|
|
|
187
|
-
|
|
132
|
+
def build_index(fai_path = nil, gzi_path = nil)
|
|
133
|
+
self.class.build_index(@file_name, fai_path, gzi_path)
|
|
134
|
+
self
|
|
135
|
+
end
|
|
188
136
|
|
|
189
137
|
private
|
|
190
138
|
|
|
@@ -192,19 +140,54 @@ module HTS
|
|
|
192
140
|
raise IOError, "closed Faidx" if closed?
|
|
193
141
|
end
|
|
194
142
|
|
|
195
|
-
# Validate range parameters.
|
|
196
|
-
# @param name [String] sequence name
|
|
197
|
-
# @param start [Integer] start position (0-based)
|
|
198
|
-
# @param stop [Integer] stop position (0-based)
|
|
199
|
-
# @raise [ArgumentError] if range is invalid
|
|
200
143
|
def validate_range!(name, start, stop)
|
|
201
144
|
raise ArgumentError, "Expect start to be >= 0" if start < 0
|
|
202
145
|
raise ArgumentError, "Expect stop to be >= 0" if stop < 0
|
|
203
146
|
raise ArgumentError, "Expect start to be <= stop" if start > stop
|
|
204
147
|
|
|
205
148
|
len = seq_len(name)
|
|
206
|
-
raise ArgumentError, "Sequence not found: #{name}" if len.nil?
|
|
207
149
|
raise ArgumentError, "Expect stop to be < seq_len (#{len})" if stop >= len
|
|
208
150
|
end
|
|
151
|
+
|
|
152
|
+
def fetch_result(ptr, len, kind, name, start, stop)
|
|
153
|
+
case len
|
|
154
|
+
when -2 then raise ArgumentError, "Sequence not found: #{name}"
|
|
155
|
+
when -1 then raise HTS::Error, "Error fetching #{kind}: #{name}:#{start}-#{stop}"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
raise HTS::Error, "Error fetching #{kind}: #{name}:#{start}-#{stop}" if ptr.null?
|
|
159
|
+
|
|
160
|
+
begin
|
|
161
|
+
ptr.read_string_length(len)
|
|
162
|
+
ensure
|
|
163
|
+
HTS::LibC.free(ptr)
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def load_handle(file_name, format, auto_build)
|
|
168
|
+
case [format, auto_build]
|
|
169
|
+
when [:fasta, true]
|
|
170
|
+
LibHTS.fai_load_format(file_name, :FAI_FASTA)
|
|
171
|
+
when [:fastq, true]
|
|
172
|
+
LibHTS.fai_load_format(file_name, :FAI_FASTQ)
|
|
173
|
+
when [:fasta, false]
|
|
174
|
+
LibHTS.fai_load3_format(file_name, nil, nil, 0, :FAI_FASTA)
|
|
175
|
+
when [:fastq, false]
|
|
176
|
+
LibHTS.fai_load3_format(file_name, nil, nil, 0, :FAI_FASTQ)
|
|
177
|
+
else
|
|
178
|
+
raise ArgumentError, "Unsupported format: #{format}"
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def resolve_format(file_name, format)
|
|
183
|
+
case format
|
|
184
|
+
when :auto
|
|
185
|
+
file_name.match?(/\.(fastq|fq)(\.gz|\.bgz)?\z/i) ? :fastq : :fasta
|
|
186
|
+
when :fasta, :fastq
|
|
187
|
+
format
|
|
188
|
+
else
|
|
189
|
+
raise ArgumentError, "Unsupported format: #{format}"
|
|
190
|
+
end
|
|
191
|
+
end
|
|
209
192
|
end
|
|
210
193
|
end
|
data/lib/hts/hts.rb
CHANGED
|
@@ -71,7 +71,10 @@ module HTS
|
|
|
71
71
|
|
|
72
72
|
def fai=(fai)
|
|
73
73
|
check_closed
|
|
74
|
-
LibHTS.hts_set_fai_filename(@hts_file, fai)
|
|
74
|
+
r = LibHTS.hts_set_fai_filename(@hts_file, fai)
|
|
75
|
+
raise "Failed to load fasta index: #{fai}" if r.negative?
|
|
76
|
+
|
|
77
|
+
self
|
|
75
78
|
end
|
|
76
79
|
|
|
77
80
|
def set_threads(n = nil)
|