htslib 0.0.0 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +91 -12
- data/lib/hts/bam/cigar.rb +33 -0
- data/lib/hts/bam/flag.rb +93 -0
- data/lib/hts/bam/header.rb +33 -0
- data/lib/hts/bam/record.rb +176 -0
- data/lib/hts/bam.rb +109 -82
- data/lib/hts/bcf/format.rb +52 -0
- data/lib/hts/bcf/header.rb +19 -0
- data/lib/hts/bcf/info.rb +40 -0
- data/lib/hts/bcf/record.rb +116 -0
- data/lib/hts/bcf.rb +75 -0
- data/lib/hts/faidx.rb +50 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +2 -2
- data/lib/hts/{ffi_constants.rb → libhts/constants.rb} +137 -75
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +2 -2
- data/lib/hts/{ffi → libhts}/hts.rb +9 -3
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +71 -31
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +188 -12
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +20 -0
- data/lib/hts/utils/open_method.rb +17 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +34 -7
- metadata +60 -22
- data/lib/hts/fai.rb +0 -16
- data/lib/hts/ffi/struct.rb +0 -14
- data/lib/hts/ffi.rb +0 -32
- data/lib/hts/tbx.rb +0 -14
- data/lib/hts/vcf.rb +0 -30
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# https://github.com/brentp/hts-nim/blob/master/src/hts/vcf.nim
|
4
|
+
# This is a port from Nim.
|
5
|
+
# TODO: Make it more like Ruby.
|
6
|
+
|
7
|
+
module HTS
|
8
|
+
class Bcf
|
9
|
+
class Format
|
10
|
+
def initialize(record)
|
11
|
+
@record = record
|
12
|
+
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
13
|
+
end
|
14
|
+
|
15
|
+
def get(key, type = nil)
|
16
|
+
n = FFI::MemoryPointer.new(:int)
|
17
|
+
p1 = @p1
|
18
|
+
h = @record.bcf.header.struct
|
19
|
+
r = @record.struct
|
20
|
+
|
21
|
+
format_values = proc do |type|
|
22
|
+
ret = LibHTS.bcf_get_format_values(h, r, key, p1, n, type)
|
23
|
+
return nil if ret < 0 # return from method.
|
24
|
+
|
25
|
+
p1.read_pointer
|
26
|
+
end
|
27
|
+
|
28
|
+
case type.to_sym
|
29
|
+
when :int, :int32
|
30
|
+
format_values.call(LibHTS::BCF_HT_INT)
|
31
|
+
.read_array_of_int32(n.read_int)
|
32
|
+
when :float, :real
|
33
|
+
format_values.call(LibHTS::BCF_HT_REAL)
|
34
|
+
.read_array_of_float(n.read_int)
|
35
|
+
when :flag
|
36
|
+
format_values.call(LibHTS::BCF_HT_FLAG)
|
37
|
+
.read_int == 1
|
38
|
+
when :string, :str
|
39
|
+
format_values.call(LibHTS::BCF_HT_STR)
|
40
|
+
.read_pointer.read_string
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def set; end
|
45
|
+
|
46
|
+
# def fields # iterator
|
47
|
+
# end
|
48
|
+
|
49
|
+
def genotypes; end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/hts/bcf/info.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Bcf
|
5
|
+
class Info
|
6
|
+
def initialize(record)
|
7
|
+
@record = record
|
8
|
+
end
|
9
|
+
|
10
|
+
def get(key, type = nil)
|
11
|
+
n = FFI::MemoryPointer.new(:int)
|
12
|
+
p1 = @record.p1
|
13
|
+
h = @record.bcf.header.struct
|
14
|
+
r = @record.struct
|
15
|
+
|
16
|
+
info_values = proc do |type|
|
17
|
+
ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
|
18
|
+
return nil if ret < 0 # return from method.
|
19
|
+
|
20
|
+
p1.read_pointer
|
21
|
+
end
|
22
|
+
|
23
|
+
case type.to_sym
|
24
|
+
when :int, :int32
|
25
|
+
info_values.call(LibHTS::BCF_HT_INT)
|
26
|
+
.read_array_of_int32(n.read_int)
|
27
|
+
when :float, :real
|
28
|
+
info_values.call(LibHTS::BCF_HT_REAL)
|
29
|
+
.read_array_of_float(n.read_int)
|
30
|
+
when :flag
|
31
|
+
info_values.call(LibHTS::BCF_HT_FLAG)
|
32
|
+
.read_int == 1
|
33
|
+
when :string, :str
|
34
|
+
info_values.call(LibHTS::BCF_HT_STR)
|
35
|
+
.read_pointer.read_string
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Bcf
|
5
|
+
class Record
|
6
|
+
def initialize(bcf_t, bcf)
|
7
|
+
@bcf1 = bcf_t
|
8
|
+
@bcf = bcf
|
9
|
+
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :p1, :bcf
|
13
|
+
|
14
|
+
def struct
|
15
|
+
@bcf1
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_ptr
|
19
|
+
@bcf.to_ptr
|
20
|
+
end
|
21
|
+
|
22
|
+
# def inspect; end
|
23
|
+
|
24
|
+
def formats; end
|
25
|
+
|
26
|
+
def genotypes; end
|
27
|
+
|
28
|
+
def chrom
|
29
|
+
hdr = @bcf.header.struct
|
30
|
+
rid = @bcf1[:rid]
|
31
|
+
|
32
|
+
return nil if hdr.null? || rid < 0 || rid >= hdr[:n][LibHTS::BCF_DT_CTG]
|
33
|
+
|
34
|
+
LibHTS::BcfIdpair.new(
|
35
|
+
hdr[:id][LibHTS::BCF_DT_CTG].to_ptr +
|
36
|
+
LibHTS::BcfIdpair.size * rid # offset
|
37
|
+
)[:key]
|
38
|
+
end
|
39
|
+
|
40
|
+
def pos
|
41
|
+
@bcf1[:pos] + 1 # FIXME
|
42
|
+
end
|
43
|
+
|
44
|
+
def start
|
45
|
+
@bcf1[:pos]
|
46
|
+
end
|
47
|
+
|
48
|
+
def stop
|
49
|
+
@bcf1[:pos] + @bcf1[:rlen]
|
50
|
+
end
|
51
|
+
|
52
|
+
def id
|
53
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
|
54
|
+
@bcf1[:d][:id]
|
55
|
+
end
|
56
|
+
|
57
|
+
def filter
|
58
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
|
59
|
+
d = @bcf1[:d]
|
60
|
+
n_flt = d[:n_flt]
|
61
|
+
|
62
|
+
case n_flt
|
63
|
+
when 0
|
64
|
+
"PASS"
|
65
|
+
when 1
|
66
|
+
i = d[:flt].read_int
|
67
|
+
LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
|
68
|
+
when 2
|
69
|
+
d[:flt].get_array_of_int(0, n_flt).map do |i|
|
70
|
+
LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def qual
|
76
|
+
@bcf1[:qual]
|
77
|
+
end
|
78
|
+
|
79
|
+
def ref
|
80
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
81
|
+
@bcf1[:d][:allele].get_pointer(0).read_string
|
82
|
+
end
|
83
|
+
|
84
|
+
def alt
|
85
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
|
86
|
+
@bcf1[:d][:allele].get_array_of_pointer(
|
87
|
+
FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
|
88
|
+
).map { |c| c.read_string }
|
89
|
+
end
|
90
|
+
|
91
|
+
def alleles
|
92
|
+
@bcf1[:d][:allele].get_array_of_pointer(
|
93
|
+
0, @bcf1[:n_allele]
|
94
|
+
).map { |c| c.read_string }
|
95
|
+
end
|
96
|
+
|
97
|
+
def info
|
98
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
|
99
|
+
Info.new(self)
|
100
|
+
end
|
101
|
+
|
102
|
+
def format
|
103
|
+
LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
|
104
|
+
Format.new(self)
|
105
|
+
end
|
106
|
+
|
107
|
+
def to_s
|
108
|
+
ksr = LibHTS::KString.new
|
109
|
+
if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
|
110
|
+
raise "Failed to format record"
|
111
|
+
end
|
112
|
+
ksr[:s]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
data/lib/hts/bcf.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
require_relative "bcf/header"
|
7
|
+
require_relative "bcf/record"
|
8
|
+
require_relative "bcf/info"
|
9
|
+
require_relative "bcf/format"
|
10
|
+
require_relative "utils/open_method"
|
11
|
+
|
12
|
+
module HTS
|
13
|
+
class Bcf
|
14
|
+
include Enumerable
|
15
|
+
extend Utils::OpenMethod
|
16
|
+
|
17
|
+
attr_reader :file_path, :mode, :header
|
18
|
+
# HtfFile is FFI::BitStruct
|
19
|
+
attr_reader :htf_file
|
20
|
+
|
21
|
+
class << self
|
22
|
+
alias open new
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize(file_path, mode = "r")
|
26
|
+
file_path = File.expand_path(file_path)
|
27
|
+
|
28
|
+
unless File.exist?(file_path)
|
29
|
+
message = "No such VCF/BCF file - #{file_path}"
|
30
|
+
raise message
|
31
|
+
end
|
32
|
+
|
33
|
+
@file_path = file_path
|
34
|
+
@mode = mode
|
35
|
+
@htf_file = LibHTS.hts_open(file_path, mode)
|
36
|
+
@header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
|
37
|
+
|
38
|
+
# FIXME: should be defined here?
|
39
|
+
@bcf1 = LibHTS.bcf_init
|
40
|
+
|
41
|
+
# IO like API
|
42
|
+
if block_given?
|
43
|
+
begin
|
44
|
+
yield self
|
45
|
+
ensure
|
46
|
+
close
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def struct
|
52
|
+
htf_file
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_ptr
|
56
|
+
htf_file.to_ptr
|
57
|
+
end
|
58
|
+
|
59
|
+
# Close the current file.
|
60
|
+
def close
|
61
|
+
LibHTS.hts_close(htf_file)
|
62
|
+
end
|
63
|
+
|
64
|
+
def each(&block)
|
65
|
+
while LibHTS.bcf_read(htf_file, header, @bcf1) != -1
|
66
|
+
record = Record.new(@bcf1, self)
|
67
|
+
block.call(record)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def n_samples
|
72
|
+
LibHTS.bcf_hdr_nsamples(header.struct)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/hts/faidx.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
require_relative "utils/open_method"
|
7
|
+
|
8
|
+
module HTS
|
9
|
+
class Faidx
|
10
|
+
extend Utils::OpenMethod
|
11
|
+
|
12
|
+
attr_reader :file_path
|
13
|
+
|
14
|
+
def initialize(file_path)
|
15
|
+
@file_path = File.expand_path(file_path)
|
16
|
+
@fai = LibHTS.fai_load(file_path)
|
17
|
+
end
|
18
|
+
|
19
|
+
def close
|
20
|
+
LibHTS.fai_destroy(@fai)
|
21
|
+
end
|
22
|
+
|
23
|
+
# the number of sequences in the index.
|
24
|
+
def size
|
25
|
+
LibHTS.faidx_nseq(@fai)
|
26
|
+
end
|
27
|
+
alias length size
|
28
|
+
|
29
|
+
# return the length of the requested chromosome.
|
30
|
+
def chrom_size(chrom)
|
31
|
+
unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
32
|
+
# FIXME
|
33
|
+
raise ArgumentError, "Expect chrom to be String or Symbol"
|
34
|
+
end
|
35
|
+
|
36
|
+
chrom = chrom.to_s
|
37
|
+
result = LibHTS.faidx_seq_len(@fai, chrom)
|
38
|
+
result == -1 ? nil : result
|
39
|
+
end
|
40
|
+
alias chrom_length chrom_size
|
41
|
+
|
42
|
+
# FIXME: naming and syntax
|
43
|
+
def cget; end
|
44
|
+
|
45
|
+
# FIXME: naming and syntax
|
46
|
+
def get; end
|
47
|
+
|
48
|
+
# __iter__
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ffi/bit_struct"
|
4
|
+
|
5
|
+
module FFI
|
6
|
+
class Struct
|
7
|
+
class << self
|
8
|
+
# @example HtsOpt
|
9
|
+
# class HtsOpt < FFI::Struct
|
10
|
+
# layout \
|
11
|
+
# :arg, :string,
|
12
|
+
# :opt, HtsFmtOption,
|
13
|
+
# :val,
|
14
|
+
# union_layout(
|
15
|
+
# :i, :int,
|
16
|
+
# :s, :string
|
17
|
+
# ),
|
18
|
+
# :next, HtsOpt.ptr
|
19
|
+
# end
|
20
|
+
|
21
|
+
def union_layout(*args)
|
22
|
+
Class.new(FFI::Union) { layout(*args) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# @example HtsFormat
|
26
|
+
# class HtsFormat < FFI::Struct
|
27
|
+
# layout \
|
28
|
+
# :category, HtsFormatCategory,
|
29
|
+
# :format, HtsExactFormat,
|
30
|
+
# :version,
|
31
|
+
# struct_layout(
|
32
|
+
# :major, :short,
|
33
|
+
# :minor, :short
|
34
|
+
# ),
|
35
|
+
# :compression, HtsCompression,
|
36
|
+
# :compression_level, :short,
|
37
|
+
# :specific, :pointer
|
38
|
+
# end
|
39
|
+
|
40
|
+
def struct_layout(*args)
|
41
|
+
Class.new(FFI::Struct) { layout(*args) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# BGZF
|
4
4
|
module HTS
|
5
|
-
module
|
5
|
+
module LibHTS
|
6
6
|
# Open an existing file descriptor for reading or writing.
|
7
7
|
attach_function \
|
8
8
|
:bgzf_dopen,
|
@@ -120,7 +120,7 @@ module HTS
|
|
120
120
|
# Read one line from a BGZF file. It is faster than bgzf_getc()
|
121
121
|
attach_function \
|
122
122
|
:bgzf_getline,
|
123
|
-
[BGZF, :int,
|
123
|
+
[BGZF, :int, KString],
|
124
124
|
:int
|
125
125
|
|
126
126
|
# Read the next BGZF block.
|