htslib 0.0.4 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +64 -23
- data/lib/hts/bam/cigar.rb +2 -0
- data/lib/hts/bam/flag.rb +6 -2
- data/lib/hts/bam/header.rb +21 -6
- data/lib/hts/bam/record.rb +39 -8
- data/lib/hts/bam.rb +88 -53
- data/lib/hts/bcf/format.rb +27 -5
- data/lib/hts/bcf/header.rb +32 -4
- data/lib/hts/bcf/info.rb +82 -7
- data/lib/hts/bcf/record.rb +20 -19
- data/lib/hts/bcf.rb +58 -34
- data/lib/hts/faidx.rb +13 -4
- data/lib/hts/ffi_ext/struct.rb +1 -1
- data/lib/hts/libhts/constants.rb +12 -4
- data/lib/hts/libhts/hts.rb +7 -1
- data/lib/hts/libhts/sam.rb +2 -590
- data/lib/hts/libhts/sam_funcs.rb +595 -0
- data/lib/hts/libhts/thread_pool.rb +139 -0
- data/lib/hts/libhts/vcf.rb +58 -438
- data/lib/hts/libhts/vcf_funcs.rb +430 -0
- data/lib/hts/libhts.rb +1 -0
- data/lib/hts/tabix.rb +13 -5
- data/lib/hts/version.rb +1 -1
- metadata +6 -4
- data/lib/hts/utils/open_method.rb +0 -17
data/lib/hts/bcf/info.rb
CHANGED
@@ -5,12 +5,34 @@ module HTS
|
|
5
5
|
class Info
|
6
6
|
def initialize(record)
|
7
7
|
@record = record
|
8
|
+
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
8
9
|
end
|
9
10
|
|
11
|
+
# For compatibility with htslib.cr.
|
12
|
+
def get_int(key)
|
13
|
+
get(key, :int)
|
14
|
+
end
|
15
|
+
|
16
|
+
# For compatibility with htslib.cr.
|
17
|
+
def get_float(key)
|
18
|
+
get(key, :float)
|
19
|
+
end
|
20
|
+
|
21
|
+
# For compatibility with htslib.cr.
|
22
|
+
def get_string(key)
|
23
|
+
get(key, :string)
|
24
|
+
end
|
25
|
+
|
26
|
+
# For compatibility with htslib.cr.
|
27
|
+
def get_flag(key)
|
28
|
+
get(key, :flag)
|
29
|
+
end
|
30
|
+
|
31
|
+
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
10
32
|
def get(key, type = nil)
|
11
33
|
n = FFI::MemoryPointer.new(:int)
|
12
|
-
p1 = @
|
13
|
-
h = @record.
|
34
|
+
p1 = @p1
|
35
|
+
h = @record.header.struct
|
14
36
|
r = @record.struct
|
15
37
|
|
16
38
|
info_values = proc do |type|
|
@@ -20,19 +42,72 @@ module HTS
|
|
20
42
|
p1.read_pointer
|
21
43
|
end
|
22
44
|
|
23
|
-
|
45
|
+
type ||= info_type_to_string(get_info_type(key))
|
46
|
+
|
47
|
+
case type&.to_sym
|
24
48
|
when :int, :int32
|
25
49
|
info_values.call(LibHTS::BCF_HT_INT)
|
26
50
|
.read_array_of_int32(n.read_int)
|
27
51
|
when :float, :real
|
28
52
|
info_values.call(LibHTS::BCF_HT_REAL)
|
29
53
|
.read_array_of_float(n.read_int)
|
30
|
-
when :flag
|
31
|
-
|
32
|
-
|
54
|
+
when :flag, :bool
|
55
|
+
case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
|
56
|
+
when 1 then true
|
57
|
+
when 0 then false
|
58
|
+
when -1 then nil
|
59
|
+
else
|
60
|
+
raise "Unknown return value from bcf_get_info_flag: #{ret}"
|
61
|
+
end
|
33
62
|
when :string, :str
|
34
63
|
info_values.call(LibHTS::BCF_HT_STR)
|
35
|
-
.
|
64
|
+
.read_string
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# FIXME: naming? room for improvement.
|
69
|
+
def fields
|
70
|
+
n_info = @record.struct[:n_info]
|
71
|
+
Array.new(n_info) do |i|
|
72
|
+
fld = LibHTS::BcfInfo.new(
|
73
|
+
@record.struct[:d][:info] +
|
74
|
+
i * LibHTS::BcfInfo.size
|
75
|
+
)
|
76
|
+
{
|
77
|
+
name: LibHTS.bcf_hdr_int2id(
|
78
|
+
@record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
|
79
|
+
),
|
80
|
+
n: LibHTS.bcf_hdr_id2number(
|
81
|
+
@record.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
|
82
|
+
),
|
83
|
+
vtype: fld[:type], i: fld[:key]
|
84
|
+
}
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def get_info_type(key)
|
91
|
+
@record.struct[:n_info].times do |i|
|
92
|
+
fld = LibHTS::BcfInfo.new(
|
93
|
+
@record.struct[:d][:info] +
|
94
|
+
i * LibHTS::BcfInfo.size
|
95
|
+
)
|
96
|
+
id = LibHTS.bcf_hdr_int2id(
|
97
|
+
@record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
|
98
|
+
)
|
99
|
+
return fld[:type] if id == key
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def info_type_to_string(t)
|
104
|
+
case t
|
105
|
+
when 0 then :flag
|
106
|
+
when 1, 2, 3, 4 then :int
|
107
|
+
when 5 then :float
|
108
|
+
when 7 then :string
|
109
|
+
else
|
110
|
+
raise "Unknown info type: #{t}"
|
36
111
|
end
|
37
112
|
end
|
38
113
|
end
|
data/lib/hts/bcf/record.rb
CHANGED
@@ -3,20 +3,19 @@
|
|
3
3
|
module HTS
|
4
4
|
class Bcf
|
5
5
|
class Record
|
6
|
-
def initialize(bcf_t,
|
6
|
+
def initialize(bcf_t, header)
|
7
7
|
@bcf1 = bcf_t
|
8
|
-
@
|
9
|
-
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
8
|
+
@header = header
|
10
9
|
end
|
11
10
|
|
12
|
-
attr_reader :
|
11
|
+
attr_reader :header
|
13
12
|
|
14
13
|
def struct
|
15
14
|
@bcf1
|
16
15
|
end
|
17
16
|
|
18
17
|
def to_ptr
|
19
|
-
@
|
18
|
+
@bcf1.to_ptr
|
20
19
|
end
|
21
20
|
|
22
21
|
# def inspect; end
|
@@ -26,15 +25,9 @@ module HTS
|
|
26
25
|
def genotypes; end
|
27
26
|
|
28
27
|
def chrom
|
29
|
-
hdr = @bcf.header.struct
|
30
28
|
rid = @bcf1[:rid]
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
LibHTS::BcfIdpair.new(
|
35
|
-
hdr[:id][LibHTS::BCF_DT_CTG].to_ptr +
|
36
|
-
LibHTS::BcfIdpair.size * rid # offset
|
37
|
-
)[:key]
|
30
|
+
LibHTS.bcf_hdr_id2name(@header.struct, rid)
|
38
31
|
end
|
39
32
|
|
40
33
|
def pos
|
@@ -64,11 +57,13 @@ module HTS
|
|
64
57
|
"PASS"
|
65
58
|
when 1
|
66
59
|
i = d[:flt].read_int
|
67
|
-
LibHTS.bcf_hdr_int2id(@
|
60
|
+
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
68
61
|
when 2
|
69
62
|
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
70
|
-
LibHTS.bcf_hdr_int2id(@
|
63
|
+
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
71
64
|
end
|
65
|
+
else
|
66
|
+
raise "Unexpected number of filters. n_flt: #{n_flt}"
|
72
67
|
end
|
73
68
|
end
|
74
69
|
|
@@ -85,13 +80,14 @@ module HTS
|
|
85
80
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
86
81
|
@bcf1[:d][:allele].get_array_of_pointer(
|
87
82
|
FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
|
88
|
-
).map
|
83
|
+
).map(&:read_string)
|
89
84
|
end
|
90
85
|
|
91
86
|
def alleles
|
87
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
92
88
|
@bcf1[:d][:allele].get_array_of_pointer(
|
93
89
|
0, @bcf1[:n_allele]
|
94
|
-
).map
|
90
|
+
).map(&:read_string)
|
95
91
|
end
|
96
92
|
|
97
93
|
def info
|
@@ -106,11 +102,16 @@ module HTS
|
|
106
102
|
|
107
103
|
def to_s
|
108
104
|
ksr = LibHTS::KString.new
|
109
|
-
if LibHTS.vcf_format(@
|
110
|
-
|
111
|
-
end
|
105
|
+
raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
|
106
|
+
|
112
107
|
ksr[:s]
|
113
108
|
end
|
109
|
+
|
110
|
+
private
|
111
|
+
|
112
|
+
def initialize_copy
|
113
|
+
raise "Not implemented"
|
114
|
+
end
|
114
115
|
end
|
115
116
|
end
|
116
117
|
end
|
data/lib/hts/bcf.rb
CHANGED
@@ -4,72 +4,96 @@
|
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
5
5
|
|
6
6
|
require_relative "bcf/header"
|
7
|
-
require_relative "bcf/record"
|
8
7
|
require_relative "bcf/info"
|
9
8
|
require_relative "bcf/format"
|
10
|
-
require_relative "
|
9
|
+
require_relative "bcf/record"
|
11
10
|
|
12
11
|
module HTS
|
13
12
|
class Bcf
|
14
13
|
include Enumerable
|
15
|
-
extend Utils::OpenMethod
|
16
14
|
|
17
15
|
attr_reader :file_path, :mode, :header
|
18
|
-
# HtfFile is FFI::BitStruct
|
19
|
-
attr_reader :htf_file
|
20
16
|
|
21
|
-
|
22
|
-
|
17
|
+
def self.open(...)
|
18
|
+
file = new(...)
|
19
|
+
return file unless block_given?
|
20
|
+
|
21
|
+
begin
|
22
|
+
yield file
|
23
|
+
ensure
|
24
|
+
file.close
|
25
|
+
end
|
26
|
+
file
|
23
27
|
end
|
24
28
|
|
25
|
-
def initialize(
|
26
|
-
|
29
|
+
def initialize(filename, mode = "r", threads: nil)
|
30
|
+
raise "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead" if block_given?
|
31
|
+
|
32
|
+
@file_path = filename == "-" ? "-" : File.expand_path(filename)
|
27
33
|
|
28
|
-
|
34
|
+
if mode[0] == "r" && !File.exist?(file_path)
|
29
35
|
message = "No such VCF/BCF file - #{file_path}"
|
30
36
|
raise message
|
31
37
|
end
|
32
38
|
|
33
|
-
@file_path = file_path
|
34
39
|
@mode = mode
|
35
|
-
@
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
# IO like API
|
42
|
-
if block_given?
|
43
|
-
begin
|
44
|
-
yield self
|
45
|
-
ensure
|
46
|
-
close
|
47
|
-
end
|
40
|
+
@hts_file = LibHTS.hts_open(file_path, mode)
|
41
|
+
|
42
|
+
if threads&.> 0
|
43
|
+
r = LibHTS.hts_set_threads(@hts_file, threads)
|
44
|
+
raise "Failed to set number of threads: #{threads}" if r < 0
|
48
45
|
end
|
46
|
+
|
47
|
+
return if mode[0] == "w"
|
48
|
+
|
49
|
+
@header = Bcf::Header.new(@hts_file)
|
49
50
|
end
|
50
51
|
|
51
52
|
def struct
|
52
|
-
|
53
|
+
@hts_file
|
53
54
|
end
|
54
55
|
|
55
56
|
def to_ptr
|
56
|
-
|
57
|
+
@hts_file.to_ptr
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_header
|
61
|
+
@header = header.dup
|
62
|
+
LibHTS.hts_set_fai_filename(header, @file_path)
|
63
|
+
LibHTS.bcf_hdr_write(@hts_file, header.struct)
|
64
|
+
end
|
65
|
+
|
66
|
+
def write(var)
|
67
|
+
var_dup = var.dup = var.dup
|
68
|
+
LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
|
57
69
|
end
|
58
70
|
|
59
71
|
# Close the current file.
|
60
72
|
def close
|
61
|
-
LibHTS.hts_close(
|
73
|
+
LibHTS.hts_close(@hts_file)
|
74
|
+
@hts_file = nil
|
62
75
|
end
|
63
76
|
|
64
|
-
def
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
77
|
+
def closed?
|
78
|
+
@hts_file.nil?
|
79
|
+
end
|
80
|
+
|
81
|
+
def sample_count
|
82
|
+
header.sample_count
|
69
83
|
end
|
70
84
|
|
71
|
-
def
|
72
|
-
|
85
|
+
def sample_names
|
86
|
+
header.sample_names
|
87
|
+
end
|
88
|
+
|
89
|
+
def each
|
90
|
+
return to_enum(__method__) unless block_given?
|
91
|
+
|
92
|
+
while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
|
93
|
+
record = Record.new(bcf1, header)
|
94
|
+
yield record
|
95
|
+
end
|
96
|
+
self
|
73
97
|
end
|
74
98
|
end
|
75
99
|
end
|
data/lib/hts/faidx.rb
CHANGED
@@ -3,17 +3,26 @@
|
|
3
3
|
# Based on hts-python
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
5
5
|
|
6
|
-
require_relative "utils/open_method"
|
7
|
-
|
8
6
|
module HTS
|
9
7
|
class Faidx
|
10
|
-
extend Utils::OpenMethod
|
11
|
-
|
12
8
|
attr_reader :file_path
|
13
9
|
|
10
|
+
class << self
|
11
|
+
alias open new
|
12
|
+
end
|
13
|
+
|
14
14
|
def initialize(file_path)
|
15
15
|
@file_path = File.expand_path(file_path)
|
16
16
|
@fai = LibHTS.fai_load(file_path)
|
17
|
+
|
18
|
+
# IO like API
|
19
|
+
if block_given?
|
20
|
+
begin
|
21
|
+
yield self
|
22
|
+
ensure
|
23
|
+
close
|
24
|
+
end
|
25
|
+
end
|
17
26
|
end
|
18
27
|
|
19
28
|
def close
|
data/lib/hts/ffi_ext/struct.rb
CHANGED
data/lib/hts/libhts/constants.rb
CHANGED
@@ -218,7 +218,7 @@ module HTS
|
|
218
218
|
|
219
219
|
SamFile = HtsFile
|
220
220
|
|
221
|
-
class
|
221
|
+
class HtsTpool < FFI::Struct
|
222
222
|
layout \
|
223
223
|
:pool, :pointer,
|
224
224
|
:qsize, :int
|
@@ -289,7 +289,7 @@ module HTS
|
|
289
289
|
:isize, :hts_pos_t
|
290
290
|
end
|
291
291
|
|
292
|
-
class Bam1 < FFI::
|
292
|
+
class Bam1 < FFI::ManagedStruct
|
293
293
|
layout \
|
294
294
|
:core, Bam1Core,
|
295
295
|
:id, :uint64,
|
@@ -301,6 +301,10 @@ module HTS
|
|
301
301
|
# bit_fields :_mempolicy,
|
302
302
|
# :mempolicy, 2,
|
303
303
|
# :_reserved, 30
|
304
|
+
|
305
|
+
def self.release(ptr)
|
306
|
+
LibHTS.bam_destroy1(ptr) unless ptr.null?
|
307
|
+
end
|
304
308
|
end
|
305
309
|
|
306
310
|
typedef :pointer, :bam_plp
|
@@ -457,7 +461,7 @@ module HTS
|
|
457
461
|
:id, :string,
|
458
462
|
:als, :pointer, # (\\0-separated string)
|
459
463
|
:allele, :pointer,
|
460
|
-
:info, BcfInfo.ptr,
|
464
|
+
:info, :pointer, # BcfInfo.ptr,
|
461
465
|
:fmt, BcfFmt.ptr,
|
462
466
|
:var, BcfVariant.ptr,
|
463
467
|
:n_var, :int,
|
@@ -466,7 +470,7 @@ module HTS
|
|
466
470
|
:indiv_dirty, :int
|
467
471
|
end
|
468
472
|
|
469
|
-
class Bcf1 < FFI::
|
473
|
+
class Bcf1 < FFI::ManagedBitStruct
|
470
474
|
layout \
|
471
475
|
:pos, :hts_pos_t,
|
472
476
|
:rlen, :hts_pos_t,
|
@@ -489,6 +493,10 @@ module HTS
|
|
489
493
|
bit_fields :_n_fmt_sample,
|
490
494
|
:n_fmt, 8,
|
491
495
|
:n_sample, 24
|
496
|
+
|
497
|
+
def self.release(ptr)
|
498
|
+
LibHTS.bcf_destroy(ptr) unless ptr.null?
|
499
|
+
end
|
492
500
|
end
|
493
501
|
end
|
494
502
|
end
|
data/lib/hts/libhts/hts.rb
CHANGED
@@ -89,6 +89,12 @@ module HTS
|
|
89
89
|
%i[HFILE string string],
|
90
90
|
HtsFile.by_ref
|
91
91
|
|
92
|
+
# For output streams, flush any buffered data
|
93
|
+
attach_function \
|
94
|
+
:hts_flush,
|
95
|
+
[HtsFile],
|
96
|
+
:int
|
97
|
+
|
92
98
|
# Close a file handle, flushing buffered data for output streams
|
93
99
|
attach_function \
|
94
100
|
:hts_close,
|
@@ -139,7 +145,7 @@ module HTS
|
|
139
145
|
# Create extra threads to aid compress/decompression for this file
|
140
146
|
attach_function \
|
141
147
|
:hts_set_thread_pool,
|
142
|
-
[HtsFile,
|
148
|
+
[HtsFile, HtsTpool],
|
143
149
|
:int
|
144
150
|
|
145
151
|
# Adds a cache of decompressed blocks, potentially speeding up seeks.
|