htslib 0.0.1 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +68 -17
- data/lib/hts/bam/cigar.rb +9 -6
- data/lib/hts/bam/flag.rb +93 -0
- data/lib/hts/bam/header.rb +12 -6
- data/lib/hts/bam/record.rb +195 -0
- data/lib/hts/bam.rb +67 -32
- data/lib/hts/bcf/format.rb +52 -0
- data/lib/hts/bcf/header.rb +19 -0
- data/lib/hts/bcf/info.rb +93 -0
- data/lib/hts/bcf/record.rb +110 -0
- data/lib/hts/bcf.rb +73 -0
- data/lib/hts/faidx.rb +59 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +2 -2
- data/lib/hts/{ffi → libhts}/constants.rb +144 -76
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +2 -2
- data/lib/hts/{ffi → libhts}/hts.rb +9 -3
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +60 -30
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +215 -12
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +28 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +32 -17
- metadata +49 -28
- data/lib/hts/bam/alignment.rb +0 -156
- data/lib/hts/fai.rb +0 -18
- data/lib/hts/ffi.rb +0 -43
- data/lib/hts/tbx.rb +0 -16
- data/lib/hts/vcf.rb +0 -32
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# https://github.com/brentp/hts-nim/blob/master/src/hts/vcf.nim
|
4
|
+
# This is a port from Nim.
|
5
|
+
# TODO: Make it more like Ruby.
|
6
|
+
|
7
|
+
module HTS
|
8
|
+
class Bcf
|
9
|
+
class Format
|
10
|
+
def initialize(record)
|
11
|
+
@record = record
|
12
|
+
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
13
|
+
end
|
14
|
+
|
15
|
+
def get(key, type = nil)
|
16
|
+
n = FFI::MemoryPointer.new(:int)
|
17
|
+
p1 = @p1
|
18
|
+
h = @record.bcf.header.struct
|
19
|
+
r = @record.struct
|
20
|
+
|
21
|
+
format_values = proc do |type|
|
22
|
+
ret = LibHTS.bcf_get_format_values(h, r, key, p1, n, type)
|
23
|
+
return nil if ret < 0 # return from method.
|
24
|
+
|
25
|
+
p1.read_pointer
|
26
|
+
end
|
27
|
+
|
28
|
+
case type.to_sym
|
29
|
+
when :int, :int32
|
30
|
+
format_values.call(LibHTS::BCF_HT_INT)
|
31
|
+
.read_array_of_int32(n.read_int)
|
32
|
+
when :float, :real
|
33
|
+
format_values.call(LibHTS::BCF_HT_REAL)
|
34
|
+
.read_array_of_float(n.read_int)
|
35
|
+
when :flag
|
36
|
+
format_values.call(LibHTS::BCF_HT_FLAG)
|
37
|
+
.read_int == 1
|
38
|
+
when :string, :str
|
39
|
+
format_values.call(LibHTS::BCF_HT_STR)
|
40
|
+
.read_pointer.read_string
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def set; end
|
45
|
+
|
46
|
+
# def fields # iterator
|
47
|
+
# end
|
48
|
+
|
49
|
+
def genotypes; end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/hts/bcf/info.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Bcf
|
5
|
+
class Info
|
6
|
+
def initialize(record)
|
7
|
+
@record = record
|
8
|
+
end
|
9
|
+
|
10
|
+
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
11
|
+
def get(key, type = nil)
|
12
|
+
n = FFI::MemoryPointer.new(:int)
|
13
|
+
p1 = @record.p1
|
14
|
+
h = @record.bcf.header.struct
|
15
|
+
r = @record.struct
|
16
|
+
|
17
|
+
info_values = proc do |type|
|
18
|
+
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
|
19
|
+
return nil if ret < 0 # return from method.
|
20
|
+
|
21
|
+
p1.read_pointer
|
22
|
+
end
|
23
|
+
|
24
|
+
type ||= info_type_to_string(get_info_type(key))
|
25
|
+
|
26
|
+
case type&.to_sym
|
27
|
+
when :int, :int32
|
28
|
+
info_values.call(LibHTS::BCF_HT_INT)
|
29
|
+
.read_array_of_int32(n.read_int)
|
30
|
+
when :float, :real
|
31
|
+
info_values.call(LibHTS::BCF_HT_REAL)
|
32
|
+
.read_array_of_float(n.read_int)
|
33
|
+
when :flag, :bool
|
34
|
+
case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
|
35
|
+
when 1 then true
|
36
|
+
when 0 then false
|
37
|
+
when -1 then nil
|
38
|
+
else
|
39
|
+
raise "Unknown return value from bcf_get_info_flag: #{ret}"
|
40
|
+
end
|
41
|
+
when :string, :str
|
42
|
+
info_values.call(LibHTS::BCF_HT_STR)
|
43
|
+
.read_string
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def fields
|
48
|
+
n_info = @record.struct[:n_info]
|
49
|
+
Array.new(n_info) do |i|
|
50
|
+
fld = LibHTS::BcfInfo.new(
|
51
|
+
@record.struct[:d][:info] +
|
52
|
+
i * LibHTS::BcfInfo.size
|
53
|
+
)
|
54
|
+
{
|
55
|
+
name: LibHTS.bcf_hdr_int2id(
|
56
|
+
@record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
|
57
|
+
),
|
58
|
+
n: LibHTS.bcf_hdr_id2number(
|
59
|
+
@record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
|
60
|
+
),
|
61
|
+
vtype: fld[:type], i: fld[:key]
|
62
|
+
}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def get_info_type(key)
|
69
|
+
@record.struct[:n_info].times do |i|
|
70
|
+
fld = LibHTS::BcfInfo.new(
|
71
|
+
@record.struct[:d][:info] +
|
72
|
+
i * LibHTS::BcfInfo.size
|
73
|
+
)
|
74
|
+
id = LibHTS.bcf_hdr_int2id(
|
75
|
+
@record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
|
76
|
+
)
|
77
|
+
return fld[:type] if id == key
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def info_type_to_string(t)
|
82
|
+
case t
|
83
|
+
when 0 then :flag
|
84
|
+
when 1, 2, 3, 4 then :int
|
85
|
+
when 5 then :float
|
86
|
+
when 7 then :string
|
87
|
+
else
|
88
|
+
raise "Unknown info type: #{t}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Bcf
|
5
|
+
class Record
|
6
|
+
def initialize(bcf_t, bcf)
|
7
|
+
@bcf1 = bcf_t
|
8
|
+
@bcf = bcf
|
9
|
+
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :p1, :bcf
|
13
|
+
|
14
|
+
def struct
|
15
|
+
@bcf1
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_ptr
|
19
|
+
@bcf.to_ptr
|
20
|
+
end
|
21
|
+
|
22
|
+
# def inspect; end
|
23
|
+
|
24
|
+
def formats; end
|
25
|
+
|
26
|
+
def genotypes; end
|
27
|
+
|
28
|
+
def chrom
|
29
|
+
hdr = @bcf.header.struct
|
30
|
+
rid = @bcf1[:rid]
|
31
|
+
|
32
|
+
LibHTS.bcf_hdr_id2name(hdr, rid)
|
33
|
+
end
|
34
|
+
|
35
|
+
def pos
|
36
|
+
@bcf1[:pos] + 1 # FIXME
|
37
|
+
end
|
38
|
+
|
39
|
+
def start
|
40
|
+
@bcf1[:pos]
|
41
|
+
end
|
42
|
+
|
43
|
+
def stop
|
44
|
+
@bcf1[:pos] + @bcf1[:rlen]
|
45
|
+
end
|
46
|
+
|
47
|
+
def id
|
48
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
|
49
|
+
@bcf1[:d][:id]
|
50
|
+
end
|
51
|
+
|
52
|
+
def filter
|
53
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
|
54
|
+
d = @bcf1[:d]
|
55
|
+
n_flt = d[:n_flt]
|
56
|
+
|
57
|
+
case n_flt
|
58
|
+
when 0
|
59
|
+
"PASS"
|
60
|
+
when 1
|
61
|
+
i = d[:flt].read_int
|
62
|
+
LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
|
63
|
+
when 2
|
64
|
+
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
65
|
+
LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def qual
|
71
|
+
@bcf1[:qual]
|
72
|
+
end
|
73
|
+
|
74
|
+
def ref
|
75
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
76
|
+
@bcf1[:d][:allele].get_pointer(0).read_string
|
77
|
+
end
|
78
|
+
|
79
|
+
def alt
|
80
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
81
|
+
@bcf1[:d][:allele].get_array_of_pointer(
|
82
|
+
FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
|
83
|
+
).map { |c| c.read_string }
|
84
|
+
end
|
85
|
+
|
86
|
+
def alleles
|
87
|
+
@bcf1[:d][:allele].get_array_of_pointer(
|
88
|
+
0, @bcf1[:n_allele]
|
89
|
+
).map { |c| c.read_string }
|
90
|
+
end
|
91
|
+
|
92
|
+
def info
|
93
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
|
94
|
+
Info.new(self)
|
95
|
+
end
|
96
|
+
|
97
|
+
def format
|
98
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
|
99
|
+
Format.new(self)
|
100
|
+
end
|
101
|
+
|
102
|
+
def to_s
|
103
|
+
ksr = LibHTS::KString.new
|
104
|
+
raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
|
105
|
+
|
106
|
+
ksr[:s]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/hts/bcf.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
require_relative "bcf/header"
|
7
|
+
require_relative "bcf/info"
|
8
|
+
require_relative "bcf/format"
|
9
|
+
require_relative "bcf/record"
|
10
|
+
|
11
|
+
module HTS
|
12
|
+
class Bcf
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
attr_reader :file_path, :mode, :header
|
16
|
+
# HtfFile is FFI::BitStruct
|
17
|
+
attr_reader :htf_file
|
18
|
+
|
19
|
+
class << self
|
20
|
+
alias open new
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(file_path, mode = "r")
|
24
|
+
file_path = File.expand_path(file_path)
|
25
|
+
|
26
|
+
unless File.exist?(file_path)
|
27
|
+
message = "No such VCF/BCF file - #{file_path}"
|
28
|
+
raise message
|
29
|
+
end
|
30
|
+
|
31
|
+
@file_path = file_path
|
32
|
+
@mode = mode
|
33
|
+
@htf_file = LibHTS.hts_open(file_path, mode)
|
34
|
+
@header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
|
35
|
+
|
36
|
+
# IO like API
|
37
|
+
if block_given?
|
38
|
+
begin
|
39
|
+
yield self
|
40
|
+
ensure
|
41
|
+
close
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def struct
|
47
|
+
htf_file
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_ptr
|
51
|
+
htf_file.to_ptr
|
52
|
+
end
|
53
|
+
|
54
|
+
# Close the current file.
|
55
|
+
def close
|
56
|
+
LibHTS.hts_close(htf_file)
|
57
|
+
end
|
58
|
+
|
59
|
+
def each
|
60
|
+
return to_enum(__method__) unless block_given?
|
61
|
+
|
62
|
+
while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
|
63
|
+
record = Record.new(bcf1, self)
|
64
|
+
yield record
|
65
|
+
end
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
69
|
+
def n_samples
|
70
|
+
LibHTS.bcf_hdr_nsamples(header.struct)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/hts/faidx.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Faidx
|
8
|
+
attr_reader :file_path
|
9
|
+
|
10
|
+
class << self
|
11
|
+
alias open new
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(file_path)
|
15
|
+
@file_path = File.expand_path(file_path)
|
16
|
+
@fai = LibHTS.fai_load(file_path)
|
17
|
+
|
18
|
+
# IO like API
|
19
|
+
if block_given?
|
20
|
+
begin
|
21
|
+
yield self
|
22
|
+
ensure
|
23
|
+
close
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def close
|
29
|
+
LibHTS.fai_destroy(@fai)
|
30
|
+
end
|
31
|
+
|
32
|
+
# the number of sequences in the index.
|
33
|
+
def size
|
34
|
+
LibHTS.faidx_nseq(@fai)
|
35
|
+
end
|
36
|
+
alias length size
|
37
|
+
|
38
|
+
# return the length of the requested chromosome.
|
39
|
+
def chrom_size(chrom)
|
40
|
+
unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
41
|
+
# FIXME
|
42
|
+
raise ArgumentError, "Expect chrom to be String or Symbol"
|
43
|
+
end
|
44
|
+
|
45
|
+
chrom = chrom.to_s
|
46
|
+
result = LibHTS.faidx_seq_len(@fai, chrom)
|
47
|
+
result == -1 ? nil : result
|
48
|
+
end
|
49
|
+
alias chrom_length chrom_size
|
50
|
+
|
51
|
+
# FIXME: naming and syntax
|
52
|
+
def cget; end
|
53
|
+
|
54
|
+
# FIXME: naming and syntax
|
55
|
+
def get; end
|
56
|
+
|
57
|
+
# __iter__
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ffi/bit_field'
|
4
|
+
|
5
|
+
module FFI
|
6
|
+
class Struct
|
7
|
+
class << self
|
8
|
+
# @example HtsOpt
|
9
|
+
# class HtsOpt < FFI::Struct
|
10
|
+
# layout \
|
11
|
+
# :arg, :string,
|
12
|
+
# :opt, HtsFmtOption,
|
13
|
+
# :val,
|
14
|
+
# union_layout(
|
15
|
+
# :i, :int,
|
16
|
+
# :s, :string
|
17
|
+
# ),
|
18
|
+
# :next, HtsOpt.ptr
|
19
|
+
# end
|
20
|
+
|
21
|
+
def union_layout(*args)
|
22
|
+
Class.new(FFI::Union) { layout(*args) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# @example HtsFormat
|
26
|
+
# class HtsFormat < FFI::Struct
|
27
|
+
# layout \
|
28
|
+
# :category, HtsFormatCategory,
|
29
|
+
# :format, HtsExactFormat,
|
30
|
+
# :version,
|
31
|
+
# struct_layout(
|
32
|
+
# :major, :short,
|
33
|
+
# :minor, :short
|
34
|
+
# ),
|
35
|
+
# :compression, HtsCompression,
|
36
|
+
# :compression_level, :short,
|
37
|
+
# :specific, :pointer
|
38
|
+
# end
|
39
|
+
|
40
|
+
def struct_layout(*args)
|
41
|
+
Class.new(FFI::Struct) { layout(*args) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# BGZF
|
4
4
|
module HTS
|
5
|
-
module
|
5
|
+
module LibHTS
|
6
6
|
# Open an existing file descriptor for reading or writing.
|
7
7
|
attach_function \
|
8
8
|
:bgzf_dopen,
|
@@ -120,7 +120,7 @@ module HTS
|
|
120
120
|
# Read one line from a BGZF file. It is faster than bgzf_getc()
|
121
121
|
attach_function \
|
122
122
|
:bgzf_getline,
|
123
|
-
[BGZF, :int,
|
123
|
+
[BGZF, :int, KString],
|
124
124
|
:int
|
125
125
|
|
126
126
|
# Read the next BGZF block.
|