htslib 0.0.10 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -16
- data/lib/hts/bam/aux.rb +39 -0
- data/lib/hts/bam/cigar.rb +0 -3
- data/lib/hts/bam/flag.rb +21 -57
- data/lib/hts/bam/header.rb +1 -4
- data/lib/hts/bam/record.rb +91 -47
- data/lib/hts/bam.rb +14 -15
- data/lib/hts/bcf/format.rb +82 -11
- data/lib/hts/bcf/info.rb +56 -31
- data/lib/hts/bcf/record.rb +43 -16
- data/lib/hts/bcf.rb +40 -15
- data/lib/hts/faidx.rb +0 -3
- data/lib/hts/ffi_ext/pointer.rb +18 -0
- data/lib/hts/hts.rb +22 -4
- data/lib/hts/libhts/constants.rb +3 -3
- data/lib/hts/libhts/cram.rb +287 -292
- data/lib/hts/libhts/vcf.rb +14 -0
- data/lib/hts/libhts.rb +4 -0
- data/lib/hts/{tabix.rb → tbx.rb} +4 -11
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +1 -1
- metadata +5 -3
data/lib/hts/bcf/info.rb
CHANGED
@@ -28,6 +28,10 @@ module HTS
|
|
28
28
|
get(key, :flag)
|
29
29
|
end
|
30
30
|
|
31
|
+
def [](key)
|
32
|
+
get(key)
|
33
|
+
end
|
34
|
+
|
31
35
|
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
32
36
|
def get(key, type = nil)
|
33
37
|
n = FFI::MemoryPointer.new(:int)
|
@@ -35,14 +39,14 @@ module HTS
|
|
35
39
|
h = @record.header.struct
|
36
40
|
r = @record.struct
|
37
41
|
|
38
|
-
info_values = proc do |
|
39
|
-
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n,
|
42
|
+
info_values = proc do |typ|
|
43
|
+
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
|
40
44
|
return nil if ret < 0 # return from method.
|
41
45
|
|
42
46
|
p1.read_pointer
|
43
47
|
end
|
44
48
|
|
45
|
-
type ||=
|
49
|
+
type ||= ht_type_to_sym(get_info_type(key))
|
46
50
|
|
47
51
|
case type&.to_sym
|
48
52
|
when :int, :int32
|
@@ -67,47 +71,68 @@ module HTS
|
|
67
71
|
|
68
72
|
# FIXME: naming? room for improvement.
|
69
73
|
def fields
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
i * LibHTS::BcfInfo.size
|
75
|
-
)
|
74
|
+
keys.map do |key|
|
75
|
+
name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
|
76
|
+
num = LibHTS.bcf_hdr_id2number(@record.header.struct, LibHTS::BCF_HL_INFO, key)
|
77
|
+
type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, key)
|
76
78
|
{
|
77
|
-
name:
|
78
|
-
|
79
|
-
),
|
80
|
-
|
81
|
-
@record.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
|
82
|
-
),
|
83
|
-
vtype: fld[:type], i: fld[:key]
|
79
|
+
name: name,
|
80
|
+
n: num,
|
81
|
+
type: ht_type_to_sym(type),
|
82
|
+
key: key
|
84
83
|
}
|
85
84
|
end
|
86
85
|
end
|
87
86
|
|
87
|
+
def length
|
88
|
+
@record.struct[:n_info]
|
89
|
+
end
|
90
|
+
|
91
|
+
def size
|
92
|
+
length
|
93
|
+
end
|
94
|
+
|
95
|
+
def to_h
|
96
|
+
ret = {}
|
97
|
+
keys.each do |key|
|
98
|
+
name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
|
99
|
+
ret[name] = get(name)
|
100
|
+
end
|
101
|
+
ret
|
102
|
+
end
|
103
|
+
|
88
104
|
private
|
89
105
|
|
106
|
+
def info_ptr
|
107
|
+
@record.struct[:d][:info].to_ptr
|
108
|
+
end
|
109
|
+
|
110
|
+
def keys
|
111
|
+
info_ptr.read_array_of_struct(LibHTS::BcfInfo, length).map do |info|
|
112
|
+
info[:key]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
90
116
|
def get_info_type(key)
|
91
117
|
@record.struct[:n_info].times do |i|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
return fld[:type] if id == key
|
118
|
+
info = LibHTS::BcfInfo.new(@record.struct[:d][:info] + i * LibHTS::BcfInfo.size)
|
119
|
+
k = info[:key]
|
120
|
+
id = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, k)
|
121
|
+
if id == key
|
122
|
+
type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, k)
|
123
|
+
return type
|
124
|
+
end
|
100
125
|
end
|
126
|
+
nil
|
101
127
|
end
|
102
128
|
|
103
|
-
def
|
129
|
+
def ht_type_to_sym(t)
|
104
130
|
case t
|
105
|
-
when
|
106
|
-
when
|
107
|
-
when
|
108
|
-
when
|
109
|
-
|
110
|
-
raise "Unknown info type: #{t}"
|
131
|
+
when LibHTS::BCF_HT_FLAG then :flag
|
132
|
+
when LibHTS::BCF_HT_INT then :int
|
133
|
+
when LibHTS::BCF_HT_REAL then :float
|
134
|
+
when LibHTS::BCF_HT_STR then :string
|
135
|
+
when LibHTS::BCF_HT_LONG then :float
|
111
136
|
end
|
112
137
|
end
|
113
138
|
end
|
data/lib/hts/bcf/record.rb
CHANGED
@@ -18,35 +18,48 @@ module HTS
|
|
18
18
|
@bcf1.to_ptr
|
19
19
|
end
|
20
20
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
21
|
+
# Get the reference id of the record.
|
22
|
+
def rid
|
23
|
+
@bcf1[:rid]
|
24
|
+
end
|
24
25
|
|
25
|
-
def
|
26
|
+
def rid=(rid)
|
27
|
+
@bcf1[:rid] = rid
|
28
|
+
end
|
26
29
|
|
30
|
+
# Get the chromosome of variant.
|
27
31
|
def chrom
|
28
|
-
rid = @bcf1[:rid]
|
29
|
-
|
30
32
|
LibHTS.bcf_hdr_id2name(@header.struct, rid)
|
31
33
|
end
|
32
34
|
|
35
|
+
# Return 0-based position.
|
33
36
|
def pos
|
34
|
-
@bcf1[:pos]
|
37
|
+
@bcf1[:pos]
|
35
38
|
end
|
36
39
|
|
37
|
-
def
|
38
|
-
@bcf1[:pos]
|
40
|
+
def pos=(pos)
|
41
|
+
@bcf1[:pos] = pos
|
39
42
|
end
|
40
43
|
|
41
|
-
|
42
|
-
|
44
|
+
# Return the 0-based, exclusive end position
|
45
|
+
def endpos
|
46
|
+
pos + @bcf1[:rlen]
|
43
47
|
end
|
44
48
|
|
49
|
+
# Return the value of the ID column.
|
45
50
|
def id
|
46
51
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
|
47
52
|
@bcf1[:d][:id]
|
48
53
|
end
|
49
54
|
|
55
|
+
def id=(id)
|
56
|
+
LibHTS.bcf_update_id(@header, @bcf1, id)
|
57
|
+
end
|
58
|
+
|
59
|
+
def clear_id
|
60
|
+
LibHTS.bcf_update_id(@header, @bcf1, ".")
|
61
|
+
end
|
62
|
+
|
50
63
|
def filter
|
51
64
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
|
52
65
|
d = @bcf1[:d]
|
@@ -58,7 +71,7 @@ module HTS
|
|
58
71
|
when 1
|
59
72
|
i = d[:flt].read_int
|
60
73
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
61
|
-
when 2
|
74
|
+
when 2..nil
|
62
75
|
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
63
76
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
64
77
|
end
|
@@ -67,10 +80,15 @@ module HTS
|
|
67
80
|
end
|
68
81
|
end
|
69
82
|
|
83
|
+
# Get variant quality.
|
70
84
|
def qual
|
71
85
|
@bcf1[:qual]
|
72
86
|
end
|
73
87
|
|
88
|
+
def qual=(qual)
|
89
|
+
@bcf1[:qual] = qual
|
90
|
+
end
|
91
|
+
|
74
92
|
def ref
|
75
93
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
76
94
|
@bcf1[:d][:allele].get_pointer(0).read_string
|
@@ -90,14 +108,23 @@ module HTS
|
|
90
108
|
).map(&:read_string)
|
91
109
|
end
|
92
110
|
|
93
|
-
def info
|
111
|
+
def info(key = nil)
|
94
112
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
|
95
|
-
Info.new(self)
|
113
|
+
info = Info.new(self)
|
114
|
+
if key
|
115
|
+
info.get(key)
|
116
|
+
else
|
117
|
+
info
|
118
|
+
end
|
96
119
|
end
|
97
120
|
|
98
|
-
def format
|
121
|
+
def format(key = nil)
|
99
122
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
|
100
|
-
|
123
|
+
if key
|
124
|
+
Format.new(self).get(key)
|
125
|
+
else
|
126
|
+
Format.new(self)
|
127
|
+
end
|
101
128
|
end
|
102
129
|
|
103
130
|
def to_s
|
data/lib/hts/bcf.rb
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# Based on hts-python
|
4
|
-
# https://github.com/quinlan-lab/hts-python
|
5
|
-
|
6
3
|
require_relative "../htslib"
|
7
4
|
|
8
5
|
require_relative "hts"
|
@@ -15,7 +12,7 @@ module HTS
|
|
15
12
|
class Bcf < Hts
|
16
13
|
include Enumerable
|
17
14
|
|
18
|
-
attr_reader :file_name, :
|
15
|
+
attr_reader :file_name, :index_name, :mode, :header
|
19
16
|
|
20
17
|
def self.open(*args, **kw)
|
21
18
|
file = new(*args, **kw) # do not yield
|
@@ -38,9 +35,10 @@ module HTS
|
|
38
35
|
|
39
36
|
# NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
|
40
37
|
|
41
|
-
@file_name
|
42
|
-
@
|
43
|
-
@
|
38
|
+
@file_name = file_name
|
39
|
+
@index_name = index
|
40
|
+
@mode = mode
|
41
|
+
@hts_file = LibHTS.hts_open(@file_name, mode)
|
44
42
|
|
45
43
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
|
46
44
|
|
@@ -52,28 +50,51 @@ module HTS
|
|
52
50
|
return if @mode[0] == "w"
|
53
51
|
|
54
52
|
@header = Bcf::Header.new(@hts_file)
|
53
|
+
|
54
|
+
create_index(index) if create_index
|
55
|
+
|
56
|
+
@idx = load_index(index)
|
57
|
+
|
58
|
+
@start_position = tell
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_index(index_name = nil)
|
62
|
+
warn "Create index for #{@file_name} to #{index_name}"
|
63
|
+
if index
|
64
|
+
LibHTS.bcf_index_build2(@hts_file, index_name, -1)
|
65
|
+
else
|
66
|
+
LibHTS.bcf_index_build(@hts_file, -1)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def load_index(index_name = nil)
|
71
|
+
if index_name
|
72
|
+
LibHTS.bcf_index_load2(@file_name, index_name)
|
73
|
+
else
|
74
|
+
LibHTS.bcf_index_load3(@file_name, nil, 2)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def index_loaded?
|
79
|
+
!@idx.null?
|
55
80
|
end
|
56
81
|
|
57
82
|
def write_header
|
83
|
+
raise IOError, "closed stream" if closed?
|
84
|
+
|
58
85
|
@header = header.dup
|
59
86
|
LibHTS.hts_set_fai_filename(header, @file_name)
|
60
87
|
LibHTS.bcf_hdr_write(@hts_file, header.struct)
|
61
88
|
end
|
62
89
|
|
63
90
|
def write(var)
|
91
|
+
raise IOError, "closed stream" if closed?
|
92
|
+
|
64
93
|
var_dup = var.dup = var.dup
|
65
94
|
LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
|
66
95
|
end
|
67
96
|
|
68
97
|
# Close the current file.
|
69
|
-
def close
|
70
|
-
LibHTS.hts_close(@hts_file)
|
71
|
-
@hts_file = nil
|
72
|
-
end
|
73
|
-
|
74
|
-
def closed?
|
75
|
-
@hts_file.nil?
|
76
|
-
end
|
77
98
|
|
78
99
|
def nsamples
|
79
100
|
header.nsamples
|
@@ -87,6 +108,8 @@ module HTS
|
|
87
108
|
# Generate a new Record object each time.
|
88
109
|
# Slower than each.
|
89
110
|
def each_copy
|
111
|
+
raise IOError, "closed stream" if closed?
|
112
|
+
|
90
113
|
return to_enum(__method__) unless block_given?
|
91
114
|
|
92
115
|
while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
|
@@ -100,6 +123,8 @@ module HTS
|
|
100
123
|
# Record object is reused.
|
101
124
|
# Faster than each_copy.
|
102
125
|
def each
|
126
|
+
raise IOError, "closed stream" if closed?
|
127
|
+
|
103
128
|
return to_enum(__method__) unless block_given?
|
104
129
|
|
105
130
|
bcf1 = LibHTS.bcf_init
|
data/lib/hts/faidx.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FFI
|
4
|
+
class Pointer
|
5
|
+
unless method_defined?(:read_array_of_struct)
|
6
|
+
def read_array_of_struct(type, length)
|
7
|
+
ary = []
|
8
|
+
size = type.size
|
9
|
+
tmp = self
|
10
|
+
length.times do |j|
|
11
|
+
ary << type.new(tmp)
|
12
|
+
tmp += size unless j == length - 1 # avoid OOB
|
13
|
+
end
|
14
|
+
ary
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/hts/hts.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative "../htslib"
|
2
4
|
|
3
5
|
module HTS
|
@@ -24,7 +26,18 @@ module HTS
|
|
24
26
|
"#{major}.#{minor}"
|
25
27
|
end
|
26
28
|
end
|
27
|
-
|
29
|
+
|
30
|
+
def close
|
31
|
+
return if closed?
|
32
|
+
|
33
|
+
LibHTS.hts_close(@hts_file)
|
34
|
+
@hts_file = nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def closed?
|
38
|
+
@hts_file.nil? || @hts_file.null?
|
39
|
+
end
|
40
|
+
|
28
41
|
def seek(offset)
|
29
42
|
if @hts_file[:is_cram] == 1
|
30
43
|
LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
|
@@ -48,9 +61,14 @@ module HTS
|
|
48
61
|
end
|
49
62
|
|
50
63
|
def rewind
|
51
|
-
|
52
|
-
|
53
|
-
|
64
|
+
if @start_position
|
65
|
+
r = seek(@start_position)
|
66
|
+
raise "Failed to rewind: #{r}" if r < 0
|
67
|
+
|
68
|
+
tell
|
69
|
+
else
|
70
|
+
raise "Cannot rewind: no start position"
|
71
|
+
end
|
54
72
|
end
|
55
73
|
end
|
56
74
|
end
|
data/lib/hts/libhts/constants.rb
CHANGED
@@ -36,7 +36,7 @@ module HTS
|
|
36
36
|
:offset, :size_t,
|
37
37
|
:_flags, :uint,
|
38
38
|
:has_errno, :int
|
39
|
-
|
39
|
+
|
40
40
|
bit_fields :_flags,
|
41
41
|
:at_eof, 1,
|
42
42
|
:mobile, 1,
|
@@ -479,8 +479,8 @@ module HTS
|
|
479
479
|
:id, :string,
|
480
480
|
:als, :pointer, # (\\0-separated string)
|
481
481
|
:allele, :pointer,
|
482
|
-
:info, :pointer, # BcfInfo.ptr,
|
483
|
-
:fmt, BcfFmt.ptr,
|
482
|
+
:info, :pointer, # array of BcfInfo.ptr,
|
483
|
+
:fmt, :pointer, # array of BcfFmt.ptr,
|
484
484
|
:var, BcfVariant.ptr,
|
485
485
|
:n_var, :int,
|
486
486
|
:var_type, :int,
|