htslib 0.0.1 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +68 -17
- data/lib/hts/bam/cigar.rb +9 -6
- data/lib/hts/bam/flag.rb +93 -0
- data/lib/hts/bam/header.rb +12 -6
- data/lib/hts/bam/record.rb +195 -0
- data/lib/hts/bam.rb +67 -32
- data/lib/hts/bcf/format.rb +52 -0
- data/lib/hts/bcf/header.rb +19 -0
- data/lib/hts/bcf/info.rb +93 -0
- data/lib/hts/bcf/record.rb +110 -0
- data/lib/hts/bcf.rb +73 -0
- data/lib/hts/faidx.rb +59 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +2 -2
- data/lib/hts/{ffi → libhts}/constants.rb +144 -76
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +2 -2
- data/lib/hts/{ffi → libhts}/hts.rb +9 -3
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +60 -30
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +215 -12
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +28 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +32 -17
- metadata +49 -28
- data/lib/hts/bam/alignment.rb +0 -156
- data/lib/hts/fai.rb +0 -18
- data/lib/hts/ffi.rb +0 -43
- data/lib/hts/tbx.rb +0 -16
- data/lib/hts/vcf.rb +0 -32
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# https://github.com/brentp/hts-nim/blob/master/src/hts/vcf.nim
|
4
|
+
# This is a port from Nim.
|
5
|
+
# TODO: Make it more like Ruby.
|
6
|
+
|
7
|
+
module HTS
|
8
|
+
class Bcf
|
9
|
+
class Format
|
10
|
+
def initialize(record)
|
11
|
+
@record = record
|
12
|
+
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
13
|
+
end
|
14
|
+
|
15
|
+
def get(key, type = nil)
|
16
|
+
n = FFI::MemoryPointer.new(:int)
|
17
|
+
p1 = @p1
|
18
|
+
h = @record.bcf.header.struct
|
19
|
+
r = @record.struct
|
20
|
+
|
21
|
+
format_values = proc do |type|
|
22
|
+
ret = LibHTS.bcf_get_format_values(h, r, key, p1, n, type)
|
23
|
+
return nil if ret < 0 # return from method.
|
24
|
+
|
25
|
+
p1.read_pointer
|
26
|
+
end
|
27
|
+
|
28
|
+
case type.to_sym
|
29
|
+
when :int, :int32
|
30
|
+
format_values.call(LibHTS::BCF_HT_INT)
|
31
|
+
.read_array_of_int32(n.read_int)
|
32
|
+
when :float, :real
|
33
|
+
format_values.call(LibHTS::BCF_HT_REAL)
|
34
|
+
.read_array_of_float(n.read_int)
|
35
|
+
when :flag
|
36
|
+
format_values.call(LibHTS::BCF_HT_FLAG)
|
37
|
+
.read_int == 1
|
38
|
+
when :string, :str
|
39
|
+
format_values.call(LibHTS::BCF_HT_STR)
|
40
|
+
.read_pointer.read_string
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def set; end
|
45
|
+
|
46
|
+
# def fields # iterator
|
47
|
+
# end
|
48
|
+
|
49
|
+
def genotypes; end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/hts/bcf/info.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Bcf
|
5
|
+
class Info
|
6
|
+
def initialize(record)
|
7
|
+
@record = record
|
8
|
+
end
|
9
|
+
|
10
|
+
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
11
|
+
def get(key, type = nil)
|
12
|
+
n = FFI::MemoryPointer.new(:int)
|
13
|
+
p1 = @record.p1
|
14
|
+
h = @record.bcf.header.struct
|
15
|
+
r = @record.struct
|
16
|
+
|
17
|
+
info_values = proc do |type|
|
18
|
+
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
|
19
|
+
return nil if ret < 0 # return from method.
|
20
|
+
|
21
|
+
p1.read_pointer
|
22
|
+
end
|
23
|
+
|
24
|
+
type ||= info_type_to_string(get_info_type(key))
|
25
|
+
|
26
|
+
case type&.to_sym
|
27
|
+
when :int, :int32
|
28
|
+
info_values.call(LibHTS::BCF_HT_INT)
|
29
|
+
.read_array_of_int32(n.read_int)
|
30
|
+
when :float, :real
|
31
|
+
info_values.call(LibHTS::BCF_HT_REAL)
|
32
|
+
.read_array_of_float(n.read_int)
|
33
|
+
when :flag, :bool
|
34
|
+
case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
|
35
|
+
when 1 then true
|
36
|
+
when 0 then false
|
37
|
+
when -1 then nil
|
38
|
+
else
|
39
|
+
raise "Unknown return value from bcf_get_info_flag: #{ret}"
|
40
|
+
end
|
41
|
+
when :string, :str
|
42
|
+
info_values.call(LibHTS::BCF_HT_STR)
|
43
|
+
.read_string
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def fields
|
48
|
+
n_info = @record.struct[:n_info]
|
49
|
+
Array.new(n_info) do |i|
|
50
|
+
fld = LibHTS::BcfInfo.new(
|
51
|
+
@record.struct[:d][:info] +
|
52
|
+
i * LibHTS::BcfInfo.size
|
53
|
+
)
|
54
|
+
{
|
55
|
+
name: LibHTS.bcf_hdr_int2id(
|
56
|
+
@record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
|
57
|
+
),
|
58
|
+
n: LibHTS.bcf_hdr_id2number(
|
59
|
+
@record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
|
60
|
+
),
|
61
|
+
vtype: fld[:type], i: fld[:key]
|
62
|
+
}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def get_info_type(key)
|
69
|
+
@record.struct[:n_info].times do |i|
|
70
|
+
fld = LibHTS::BcfInfo.new(
|
71
|
+
@record.struct[:d][:info] +
|
72
|
+
i * LibHTS::BcfInfo.size
|
73
|
+
)
|
74
|
+
id = LibHTS.bcf_hdr_int2id(
|
75
|
+
@record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
|
76
|
+
)
|
77
|
+
return fld[:type] if id == key
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def info_type_to_string(t)
|
82
|
+
case t
|
83
|
+
when 0 then :flag
|
84
|
+
when 1, 2, 3, 4 then :int
|
85
|
+
when 5 then :float
|
86
|
+
when 7 then :string
|
87
|
+
else
|
88
|
+
raise "Unknown info type: #{t}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Bcf
|
5
|
+
class Record
|
6
|
+
def initialize(bcf_t, bcf)
|
7
|
+
@bcf1 = bcf_t
|
8
|
+
@bcf = bcf
|
9
|
+
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :p1, :bcf
|
13
|
+
|
14
|
+
def struct
|
15
|
+
@bcf1
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_ptr
|
19
|
+
@bcf.to_ptr
|
20
|
+
end
|
21
|
+
|
22
|
+
# def inspect; end
|
23
|
+
|
24
|
+
def formats; end
|
25
|
+
|
26
|
+
def genotypes; end
|
27
|
+
|
28
|
+
def chrom
|
29
|
+
hdr = @bcf.header.struct
|
30
|
+
rid = @bcf1[:rid]
|
31
|
+
|
32
|
+
LibHTS.bcf_hdr_id2name(hdr, rid)
|
33
|
+
end
|
34
|
+
|
35
|
+
def pos
|
36
|
+
@bcf1[:pos] + 1 # FIXME
|
37
|
+
end
|
38
|
+
|
39
|
+
def start
|
40
|
+
@bcf1[:pos]
|
41
|
+
end
|
42
|
+
|
43
|
+
def stop
|
44
|
+
@bcf1[:pos] + @bcf1[:rlen]
|
45
|
+
end
|
46
|
+
|
47
|
+
def id
|
48
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
|
49
|
+
@bcf1[:d][:id]
|
50
|
+
end
|
51
|
+
|
52
|
+
def filter
|
53
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
|
54
|
+
d = @bcf1[:d]
|
55
|
+
n_flt = d[:n_flt]
|
56
|
+
|
57
|
+
case n_flt
|
58
|
+
when 0
|
59
|
+
"PASS"
|
60
|
+
when 1
|
61
|
+
i = d[:flt].read_int
|
62
|
+
LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
|
63
|
+
when 2
|
64
|
+
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
65
|
+
LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def qual
|
71
|
+
@bcf1[:qual]
|
72
|
+
end
|
73
|
+
|
74
|
+
def ref
|
75
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
76
|
+
@bcf1[:d][:allele].get_pointer(0).read_string
|
77
|
+
end
|
78
|
+
|
79
|
+
def alt
|
80
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
81
|
+
@bcf1[:d][:allele].get_array_of_pointer(
|
82
|
+
FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
|
83
|
+
).map { |c| c.read_string }
|
84
|
+
end
|
85
|
+
|
86
|
+
def alleles
|
87
|
+
@bcf1[:d][:allele].get_array_of_pointer(
|
88
|
+
0, @bcf1[:n_allele]
|
89
|
+
).map { |c| c.read_string }
|
90
|
+
end
|
91
|
+
|
92
|
+
def info
|
93
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
|
94
|
+
Info.new(self)
|
95
|
+
end
|
96
|
+
|
97
|
+
def format
|
98
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
|
99
|
+
Format.new(self)
|
100
|
+
end
|
101
|
+
|
102
|
+
def to_s
|
103
|
+
ksr = LibHTS::KString.new
|
104
|
+
raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
|
105
|
+
|
106
|
+
ksr[:s]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/hts/bcf.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
require_relative "bcf/header"
|
7
|
+
require_relative "bcf/info"
|
8
|
+
require_relative "bcf/format"
|
9
|
+
require_relative "bcf/record"
|
10
|
+
|
11
|
+
module HTS
|
12
|
+
class Bcf
|
13
|
+
include Enumerable
|
14
|
+
|
15
|
+
attr_reader :file_path, :mode, :header
|
16
|
+
# HtfFile is FFI::BitStruct
|
17
|
+
attr_reader :htf_file
|
18
|
+
|
19
|
+
class << self
|
20
|
+
alias open new
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(file_path, mode = "r")
|
24
|
+
file_path = File.expand_path(file_path)
|
25
|
+
|
26
|
+
unless File.exist?(file_path)
|
27
|
+
message = "No such VCF/BCF file - #{file_path}"
|
28
|
+
raise message
|
29
|
+
end
|
30
|
+
|
31
|
+
@file_path = file_path
|
32
|
+
@mode = mode
|
33
|
+
@htf_file = LibHTS.hts_open(file_path, mode)
|
34
|
+
@header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
|
35
|
+
|
36
|
+
# IO like API
|
37
|
+
if block_given?
|
38
|
+
begin
|
39
|
+
yield self
|
40
|
+
ensure
|
41
|
+
close
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def struct
|
47
|
+
htf_file
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_ptr
|
51
|
+
htf_file.to_ptr
|
52
|
+
end
|
53
|
+
|
54
|
+
# Close the current file.
|
55
|
+
def close
|
56
|
+
LibHTS.hts_close(htf_file)
|
57
|
+
end
|
58
|
+
|
59
|
+
def each
|
60
|
+
return to_enum(__method__) unless block_given?
|
61
|
+
|
62
|
+
while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
|
63
|
+
record = Record.new(bcf1, self)
|
64
|
+
yield record
|
65
|
+
end
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
69
|
+
def n_samples
|
70
|
+
LibHTS.bcf_hdr_nsamples(header.struct)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/hts/faidx.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Faidx
|
8
|
+
attr_reader :file_path
|
9
|
+
|
10
|
+
class << self
|
11
|
+
alias open new
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(file_path)
|
15
|
+
@file_path = File.expand_path(file_path)
|
16
|
+
@fai = LibHTS.fai_load(file_path)
|
17
|
+
|
18
|
+
# IO like API
|
19
|
+
if block_given?
|
20
|
+
begin
|
21
|
+
yield self
|
22
|
+
ensure
|
23
|
+
close
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def close
|
29
|
+
LibHTS.fai_destroy(@fai)
|
30
|
+
end
|
31
|
+
|
32
|
+
# the number of sequences in the index.
|
33
|
+
def size
|
34
|
+
LibHTS.faidx_nseq(@fai)
|
35
|
+
end
|
36
|
+
alias length size
|
37
|
+
|
38
|
+
# return the length of the requested chromosome.
|
39
|
+
def chrom_size(chrom)
|
40
|
+
unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
41
|
+
# FIXME
|
42
|
+
raise ArgumentError, "Expect chrom to be String or Symbol"
|
43
|
+
end
|
44
|
+
|
45
|
+
chrom = chrom.to_s
|
46
|
+
result = LibHTS.faidx_seq_len(@fai, chrom)
|
47
|
+
result == -1 ? nil : result
|
48
|
+
end
|
49
|
+
alias chrom_length chrom_size
|
50
|
+
|
51
|
+
# FIXME: naming and syntax
|
52
|
+
def cget; end
|
53
|
+
|
54
|
+
# FIXME: naming and syntax
|
55
|
+
def get; end
|
56
|
+
|
57
|
+
# __iter__
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ffi/bit_field'
|
4
|
+
|
5
|
+
module FFI
|
6
|
+
class Struct
|
7
|
+
class << self
|
8
|
+
# @example HtsOpt
|
9
|
+
# class HtsOpt < FFI::Struct
|
10
|
+
# layout \
|
11
|
+
# :arg, :string,
|
12
|
+
# :opt, HtsFmtOption,
|
13
|
+
# :val,
|
14
|
+
# union_layout(
|
15
|
+
# :i, :int,
|
16
|
+
# :s, :string
|
17
|
+
# ),
|
18
|
+
# :next, HtsOpt.ptr
|
19
|
+
# end
|
20
|
+
|
21
|
+
def union_layout(*args)
|
22
|
+
Class.new(FFI::Union) { layout(*args) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# @example HtsFormat
|
26
|
+
# class HtsFormat < FFI::Struct
|
27
|
+
# layout \
|
28
|
+
# :category, HtsFormatCategory,
|
29
|
+
# :format, HtsExactFormat,
|
30
|
+
# :version,
|
31
|
+
# struct_layout(
|
32
|
+
# :major, :short,
|
33
|
+
# :minor, :short
|
34
|
+
# ),
|
35
|
+
# :compression, HtsCompression,
|
36
|
+
# :compression_level, :short,
|
37
|
+
# :specific, :pointer
|
38
|
+
# end
|
39
|
+
|
40
|
+
def struct_layout(*args)
|
41
|
+
Class.new(FFI::Struct) { layout(*args) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# BGZF
|
4
4
|
module HTS
|
5
|
-
module
|
5
|
+
module LibHTS
|
6
6
|
# Open an existing file descriptor for reading or writing.
|
7
7
|
attach_function \
|
8
8
|
:bgzf_dopen,
|
@@ -120,7 +120,7 @@ module HTS
|
|
120
120
|
# Read one line from a BGZF file. It is faster than bgzf_getc()
|
121
121
|
attach_function \
|
122
122
|
:bgzf_getline,
|
123
|
-
[BGZF, :int,
|
123
|
+
[BGZF, :int, KString],
|
124
124
|
:int
|
125
125
|
|
126
126
|
# Read the next BGZF block.
|