htslib 0.0.10 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -16
- data/lib/hts/bam/aux.rb +39 -0
- data/lib/hts/bam/cigar.rb +0 -3
- data/lib/hts/bam/flag.rb +21 -57
- data/lib/hts/bam/header.rb +1 -4
- data/lib/hts/bam/record.rb +91 -47
- data/lib/hts/bam.rb +14 -15
- data/lib/hts/bcf/format.rb +82 -11
- data/lib/hts/bcf/info.rb +56 -31
- data/lib/hts/bcf/record.rb +43 -16
- data/lib/hts/bcf.rb +40 -15
- data/lib/hts/faidx.rb +0 -3
- data/lib/hts/ffi_ext/pointer.rb +18 -0
- data/lib/hts/hts.rb +22 -4
- data/lib/hts/libhts/constants.rb +3 -3
- data/lib/hts/libhts/cram.rb +287 -292
- data/lib/hts/libhts/vcf.rb +14 -0
- data/lib/hts/libhts.rb +4 -0
- data/lib/hts/{tabix.rb → tbx.rb} +4 -11
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +1 -1
- metadata +5 -3
data/lib/hts/bcf/info.rb
CHANGED
@@ -28,6 +28,10 @@ module HTS
|
|
28
28
|
get(key, :flag)
|
29
29
|
end
|
30
30
|
|
31
|
+
def [](key)
|
32
|
+
get(key)
|
33
|
+
end
|
34
|
+
|
31
35
|
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
32
36
|
def get(key, type = nil)
|
33
37
|
n = FFI::MemoryPointer.new(:int)
|
@@ -35,14 +39,14 @@ module HTS
|
|
35
39
|
h = @record.header.struct
|
36
40
|
r = @record.struct
|
37
41
|
|
38
|
-
info_values = proc do |
|
39
|
-
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n,
|
42
|
+
info_values = proc do |typ|
|
43
|
+
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
|
40
44
|
return nil if ret < 0 # return from method.
|
41
45
|
|
42
46
|
p1.read_pointer
|
43
47
|
end
|
44
48
|
|
45
|
-
type ||=
|
49
|
+
type ||= ht_type_to_sym(get_info_type(key))
|
46
50
|
|
47
51
|
case type&.to_sym
|
48
52
|
when :int, :int32
|
@@ -67,47 +71,68 @@ module HTS
|
|
67
71
|
|
68
72
|
# FIXME: naming? room for improvement.
|
69
73
|
def fields
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
i * LibHTS::BcfInfo.size
|
75
|
-
)
|
74
|
+
keys.map do |key|
|
75
|
+
name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
|
76
|
+
num = LibHTS.bcf_hdr_id2number(@record.header.struct, LibHTS::BCF_HL_INFO, key)
|
77
|
+
type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, key)
|
76
78
|
{
|
77
|
-
name:
|
78
|
-
|
79
|
-
),
|
80
|
-
|
81
|
-
@record.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
|
82
|
-
),
|
83
|
-
vtype: fld[:type], i: fld[:key]
|
79
|
+
name: name,
|
80
|
+
n: num,
|
81
|
+
type: ht_type_to_sym(type),
|
82
|
+
key: key
|
84
83
|
}
|
85
84
|
end
|
86
85
|
end
|
87
86
|
|
87
|
+
def length
|
88
|
+
@record.struct[:n_info]
|
89
|
+
end
|
90
|
+
|
91
|
+
def size
|
92
|
+
length
|
93
|
+
end
|
94
|
+
|
95
|
+
def to_h
|
96
|
+
ret = {}
|
97
|
+
keys.each do |key|
|
98
|
+
name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
|
99
|
+
ret[name] = get(name)
|
100
|
+
end
|
101
|
+
ret
|
102
|
+
end
|
103
|
+
|
88
104
|
private
|
89
105
|
|
106
|
+
def info_ptr
|
107
|
+
@record.struct[:d][:info].to_ptr
|
108
|
+
end
|
109
|
+
|
110
|
+
def keys
|
111
|
+
info_ptr.read_array_of_struct(LibHTS::BcfInfo, length).map do |info|
|
112
|
+
info[:key]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
90
116
|
def get_info_type(key)
|
91
117
|
@record.struct[:n_info].times do |i|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
return fld[:type] if id == key
|
118
|
+
info = LibHTS::BcfInfo.new(@record.struct[:d][:info] + i * LibHTS::BcfInfo.size)
|
119
|
+
k = info[:key]
|
120
|
+
id = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, k)
|
121
|
+
if id == key
|
122
|
+
type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, k)
|
123
|
+
return type
|
124
|
+
end
|
100
125
|
end
|
126
|
+
nil
|
101
127
|
end
|
102
128
|
|
103
|
-
def
|
129
|
+
def ht_type_to_sym(t)
|
104
130
|
case t
|
105
|
-
when
|
106
|
-
when
|
107
|
-
when
|
108
|
-
when
|
109
|
-
|
110
|
-
raise "Unknown info type: #{t}"
|
131
|
+
when LibHTS::BCF_HT_FLAG then :flag
|
132
|
+
when LibHTS::BCF_HT_INT then :int
|
133
|
+
when LibHTS::BCF_HT_REAL then :float
|
134
|
+
when LibHTS::BCF_HT_STR then :string
|
135
|
+
when LibHTS::BCF_HT_LONG then :float
|
111
136
|
end
|
112
137
|
end
|
113
138
|
end
|
data/lib/hts/bcf/record.rb
CHANGED
@@ -18,35 +18,48 @@ module HTS
|
|
18
18
|
@bcf1.to_ptr
|
19
19
|
end
|
20
20
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
21
|
+
# Get the reference id of the record.
|
22
|
+
def rid
|
23
|
+
@bcf1[:rid]
|
24
|
+
end
|
24
25
|
|
25
|
-
def
|
26
|
+
def rid=(rid)
|
27
|
+
@bcf1[:rid] = rid
|
28
|
+
end
|
26
29
|
|
30
|
+
# Get the chromosome of variant.
|
27
31
|
def chrom
|
28
|
-
rid = @bcf1[:rid]
|
29
|
-
|
30
32
|
LibHTS.bcf_hdr_id2name(@header.struct, rid)
|
31
33
|
end
|
32
34
|
|
35
|
+
# Return 0-based position.
|
33
36
|
def pos
|
34
|
-
@bcf1[:pos]
|
37
|
+
@bcf1[:pos]
|
35
38
|
end
|
36
39
|
|
37
|
-
def
|
38
|
-
@bcf1[:pos]
|
40
|
+
def pos=(pos)
|
41
|
+
@bcf1[:pos] = pos
|
39
42
|
end
|
40
43
|
|
41
|
-
|
42
|
-
|
44
|
+
# Return the 0-based, exclusive end position
|
45
|
+
def endpos
|
46
|
+
pos + @bcf1[:rlen]
|
43
47
|
end
|
44
48
|
|
49
|
+
# Return the value of the ID column.
|
45
50
|
def id
|
46
51
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
|
47
52
|
@bcf1[:d][:id]
|
48
53
|
end
|
49
54
|
|
55
|
+
def id=(id)
|
56
|
+
LibHTS.bcf_update_id(@header, @bcf1, id)
|
57
|
+
end
|
58
|
+
|
59
|
+
def clear_id
|
60
|
+
LibHTS.bcf_update_id(@header, @bcf1, ".")
|
61
|
+
end
|
62
|
+
|
50
63
|
def filter
|
51
64
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
|
52
65
|
d = @bcf1[:d]
|
@@ -58,7 +71,7 @@ module HTS
|
|
58
71
|
when 1
|
59
72
|
i = d[:flt].read_int
|
60
73
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
61
|
-
when 2
|
74
|
+
when 2..nil
|
62
75
|
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
63
76
|
LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
|
64
77
|
end
|
@@ -67,10 +80,15 @@ module HTS
|
|
67
80
|
end
|
68
81
|
end
|
69
82
|
|
83
|
+
# Get variant quality.
|
70
84
|
def qual
|
71
85
|
@bcf1[:qual]
|
72
86
|
end
|
73
87
|
|
88
|
+
def qual=(qual)
|
89
|
+
@bcf1[:qual] = qual
|
90
|
+
end
|
91
|
+
|
74
92
|
def ref
|
75
93
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
76
94
|
@bcf1[:d][:allele].get_pointer(0).read_string
|
@@ -90,14 +108,23 @@ module HTS
|
|
90
108
|
).map(&:read_string)
|
91
109
|
end
|
92
110
|
|
93
|
-
def info
|
111
|
+
def info(key = nil)
|
94
112
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
|
95
|
-
Info.new(self)
|
113
|
+
info = Info.new(self)
|
114
|
+
if key
|
115
|
+
info.get(key)
|
116
|
+
else
|
117
|
+
info
|
118
|
+
end
|
96
119
|
end
|
97
120
|
|
98
|
-
def format
|
121
|
+
def format(key = nil)
|
99
122
|
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
|
100
|
-
|
123
|
+
if key
|
124
|
+
Format.new(self).get(key)
|
125
|
+
else
|
126
|
+
Format.new(self)
|
127
|
+
end
|
101
128
|
end
|
102
129
|
|
103
130
|
def to_s
|
data/lib/hts/bcf.rb
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# Based on hts-python
|
4
|
-
# https://github.com/quinlan-lab/hts-python
|
5
|
-
|
6
3
|
require_relative "../htslib"
|
7
4
|
|
8
5
|
require_relative "hts"
|
@@ -15,7 +12,7 @@ module HTS
|
|
15
12
|
class Bcf < Hts
|
16
13
|
include Enumerable
|
17
14
|
|
18
|
-
attr_reader :file_name, :
|
15
|
+
attr_reader :file_name, :index_name, :mode, :header
|
19
16
|
|
20
17
|
def self.open(*args, **kw)
|
21
18
|
file = new(*args, **kw) # do not yield
|
@@ -38,9 +35,10 @@ module HTS
|
|
38
35
|
|
39
36
|
# NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
|
40
37
|
|
41
|
-
@file_name
|
42
|
-
@
|
43
|
-
@
|
38
|
+
@file_name = file_name
|
39
|
+
@index_name = index
|
40
|
+
@mode = mode
|
41
|
+
@hts_file = LibHTS.hts_open(@file_name, mode)
|
44
42
|
|
45
43
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
|
46
44
|
|
@@ -52,28 +50,51 @@ module HTS
|
|
52
50
|
return if @mode[0] == "w"
|
53
51
|
|
54
52
|
@header = Bcf::Header.new(@hts_file)
|
53
|
+
|
54
|
+
create_index(index) if create_index
|
55
|
+
|
56
|
+
@idx = load_index(index)
|
57
|
+
|
58
|
+
@start_position = tell
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_index(index_name = nil)
|
62
|
+
warn "Create index for #{@file_name} to #{index_name}"
|
63
|
+
if index
|
64
|
+
LibHTS.bcf_index_build2(@hts_file, index_name, -1)
|
65
|
+
else
|
66
|
+
LibHTS.bcf_index_build(@hts_file, -1)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def load_index(index_name = nil)
|
71
|
+
if index_name
|
72
|
+
LibHTS.bcf_index_load2(@file_name, index_name)
|
73
|
+
else
|
74
|
+
LibHTS.bcf_index_load3(@file_name, nil, 2)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def index_loaded?
|
79
|
+
!@idx.null?
|
55
80
|
end
|
56
81
|
|
57
82
|
def write_header
|
83
|
+
raise IOError, "closed stream" if closed?
|
84
|
+
|
58
85
|
@header = header.dup
|
59
86
|
LibHTS.hts_set_fai_filename(header, @file_name)
|
60
87
|
LibHTS.bcf_hdr_write(@hts_file, header.struct)
|
61
88
|
end
|
62
89
|
|
63
90
|
def write(var)
|
91
|
+
raise IOError, "closed stream" if closed?
|
92
|
+
|
64
93
|
var_dup = var.dup = var.dup
|
65
94
|
LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
|
66
95
|
end
|
67
96
|
|
68
97
|
# Close the current file.
|
69
|
-
def close
|
70
|
-
LibHTS.hts_close(@hts_file)
|
71
|
-
@hts_file = nil
|
72
|
-
end
|
73
|
-
|
74
|
-
def closed?
|
75
|
-
@hts_file.nil?
|
76
|
-
end
|
77
98
|
|
78
99
|
def nsamples
|
79
100
|
header.nsamples
|
@@ -87,6 +108,8 @@ module HTS
|
|
87
108
|
# Generate a new Record object each time.
|
88
109
|
# Slower than each.
|
89
110
|
def each_copy
|
111
|
+
raise IOError, "closed stream" if closed?
|
112
|
+
|
90
113
|
return to_enum(__method__) unless block_given?
|
91
114
|
|
92
115
|
while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
|
@@ -100,6 +123,8 @@ module HTS
|
|
100
123
|
# Record object is reused.
|
101
124
|
# Faster than each_copy.
|
102
125
|
def each
|
126
|
+
raise IOError, "closed stream" if closed?
|
127
|
+
|
103
128
|
return to_enum(__method__) unless block_given?
|
104
129
|
|
105
130
|
bcf1 = LibHTS.bcf_init
|
data/lib/hts/faidx.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module FFI
|
4
|
+
class Pointer
|
5
|
+
unless method_defined?(:read_array_of_struct)
|
6
|
+
def read_array_of_struct(type, length)
|
7
|
+
ary = []
|
8
|
+
size = type.size
|
9
|
+
tmp = self
|
10
|
+
length.times do |j|
|
11
|
+
ary << type.new(tmp)
|
12
|
+
tmp += size unless j == length - 1 # avoid OOB
|
13
|
+
end
|
14
|
+
ary
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/hts/hts.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative "../htslib"
|
2
4
|
|
3
5
|
module HTS
|
@@ -24,7 +26,18 @@ module HTS
|
|
24
26
|
"#{major}.#{minor}"
|
25
27
|
end
|
26
28
|
end
|
27
|
-
|
29
|
+
|
30
|
+
def close
|
31
|
+
return if closed?
|
32
|
+
|
33
|
+
LibHTS.hts_close(@hts_file)
|
34
|
+
@hts_file = nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def closed?
|
38
|
+
@hts_file.nil? || @hts_file.null?
|
39
|
+
end
|
40
|
+
|
28
41
|
def seek(offset)
|
29
42
|
if @hts_file[:is_cram] == 1
|
30
43
|
LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
|
@@ -48,9 +61,14 @@ module HTS
|
|
48
61
|
end
|
49
62
|
|
50
63
|
def rewind
|
51
|
-
|
52
|
-
|
53
|
-
|
64
|
+
if @start_position
|
65
|
+
r = seek(@start_position)
|
66
|
+
raise "Failed to rewind: #{r}" if r < 0
|
67
|
+
|
68
|
+
tell
|
69
|
+
else
|
70
|
+
raise "Cannot rewind: no start position"
|
71
|
+
end
|
54
72
|
end
|
55
73
|
end
|
56
74
|
end
|
data/lib/hts/libhts/constants.rb
CHANGED
@@ -36,7 +36,7 @@ module HTS
|
|
36
36
|
:offset, :size_t,
|
37
37
|
:_flags, :uint,
|
38
38
|
:has_errno, :int
|
39
|
-
|
39
|
+
|
40
40
|
bit_fields :_flags,
|
41
41
|
:at_eof, 1,
|
42
42
|
:mobile, 1,
|
@@ -479,8 +479,8 @@ module HTS
|
|
479
479
|
:id, :string,
|
480
480
|
:als, :pointer, # (\\0-separated string)
|
481
481
|
:allele, :pointer,
|
482
|
-
:info, :pointer, # BcfInfo.ptr,
|
483
|
-
:fmt, BcfFmt.ptr,
|
482
|
+
:info, :pointer, # array of BcfInfo.ptr,
|
483
|
+
:fmt, :pointer, # array of BcfFmt.ptr,
|
484
484
|
:var, BcfVariant.ptr,
|
485
485
|
:n_var, :int,
|
486
486
|
:var_type, :int,
|