htslib 0.0.2 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ # https://github.com/brentp/hts-nim/blob/master/src/hts/vcf.nim
4
+ # This is a port from Nim.
5
+ # TODO: Make it more like Ruby.
6
+
7
+ module HTS
8
+ class Bcf
9
+ class Format
10
+ def initialize(record)
11
+ @record = record
12
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
13
+ end
14
+
15
+ def get(key, type = nil)
16
+ n = FFI::MemoryPointer.new(:int)
17
+ p1 = @p1
18
+ h = @record.bcf.header.struct
19
+ r = @record.struct
20
+
21
+ format_values = proc do |type|
22
+ ret = LibHTS.bcf_get_format_values(h, r, key, p1, n, type)
23
+ return nil if ret < 0 # return from method.
24
+
25
+ p1.read_pointer
26
+ end
27
+
28
+ case type.to_sym
29
+ when :int, :int32
30
+ format_values.call(LibHTS::BCF_HT_INT)
31
+ .read_array_of_int32(n.read_int)
32
+ when :float, :real
33
+ format_values.call(LibHTS::BCF_HT_REAL)
34
+ .read_array_of_float(n.read_int)
35
+ when :flag
36
+ format_values.call(LibHTS::BCF_HT_FLAG)
37
+ .read_int == 1
38
+ when :string, :str
39
+ raise NotImplementedError, "String type not implemented yet."
40
+ format_values.call(LibHTS::BCF_HT_STR)
41
+ .read_string
42
+ end
43
+ end
44
+
45
+ def set; end
46
+
47
+ # def fields # iterator
48
+ # end
49
+
50
+ def genotypes; end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Header
6
+ def initialize(bcf_hdr)
7
+ @bcf_hdr = bcf_hdr
8
+ end
9
+
10
+ def struct
11
+ @bcf_hdr
12
+ end
13
+
14
+ def to_ptr
15
+ @bcf_hdr.to_ptr
16
+ end
17
+
18
+ def to_s
19
+ kstr = LibHTS::KString.new
20
+ raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
21
+
22
+ kstr[:s]
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Info
6
+ def initialize(record)
7
+ @record = record
8
+ end
9
+
10
+ # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
11
+ def get(key, type = nil)
12
+ n = FFI::MemoryPointer.new(:int)
13
+ p1 = @record.p1
14
+ h = @record.bcf.header.struct
15
+ r = @record.struct
16
+
17
+ info_values = proc do |type|
18
+ ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
19
+ return nil if ret < 0 # return from method.
20
+
21
+ p1.read_pointer
22
+ end
23
+
24
+ type ||= info_type_to_string(get_info_type(key))
25
+
26
+ case type&.to_sym
27
+ when :int, :int32
28
+ info_values.call(LibHTS::BCF_HT_INT)
29
+ .read_array_of_int32(n.read_int)
30
+ when :float, :real
31
+ info_values.call(LibHTS::BCF_HT_REAL)
32
+ .read_array_of_float(n.read_int)
33
+ when :flag, :bool
34
+ case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
35
+ when 1 then true
36
+ when 0 then false
37
+ when -1 then nil
38
+ else
39
+ raise "Unknown return value from bcf_get_info_flag: #{ret}"
40
+ end
41
+ when :string, :str
42
+ info_values.call(LibHTS::BCF_HT_STR)
43
+ .read_string
44
+ end
45
+ end
46
+
47
+ # FIXME: naming? room for improvement.
48
+ def fields
49
+ n_info = @record.struct[:n_info]
50
+ Array.new(n_info) do |i|
51
+ fld = LibHTS::BcfInfo.new(
52
+ @record.struct[:d][:info] +
53
+ i * LibHTS::BcfInfo.size
54
+ )
55
+ {
56
+ name: LibHTS.bcf_hdr_int2id(
57
+ @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
58
+ ),
59
+ n: LibHTS.bcf_hdr_id2number(
60
+ @record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
61
+ ),
62
+ vtype: fld[:type], i: fld[:key]
63
+ }
64
+ end
65
+ end
66
+
67
+ private
68
+
69
+ def get_info_type(key)
70
+ @record.struct[:n_info].times do |i|
71
+ fld = LibHTS::BcfInfo.new(
72
+ @record.struct[:d][:info] +
73
+ i * LibHTS::BcfInfo.size
74
+ )
75
+ id = LibHTS.bcf_hdr_int2id(
76
+ @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
77
+ )
78
+ return fld[:type] if id == key
79
+ end
80
+ end
81
+
82
+ def info_type_to_string(t)
83
+ case t
84
+ when 0 then :flag
85
+ when 1, 2, 3, 4 then :int
86
+ when 5 then :float
87
+ when 7 then :string
88
+ else
89
+ raise "Unknown info type: #{t}"
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Record
6
+ def initialize(bcf_t, bcf)
7
+ @bcf1 = bcf_t
8
+ @bcf = bcf
9
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
+ end
11
+
12
+ attr_reader :p1, :bcf
13
+
14
+ def struct
15
+ @bcf1
16
+ end
17
+
18
+ def to_ptr
19
+ @bcf1.to_ptr
20
+ end
21
+
22
+ # def inspect; end
23
+
24
+ def formats; end
25
+
26
+ def genotypes; end
27
+
28
+ def chrom
29
+ hdr = @bcf.header.struct
30
+ rid = @bcf1[:rid]
31
+
32
+ LibHTS.bcf_hdr_id2name(hdr, rid)
33
+ end
34
+
35
+ def pos
36
+ @bcf1[:pos] + 1 # FIXME
37
+ end
38
+
39
+ def start
40
+ @bcf1[:pos]
41
+ end
42
+
43
+ def stop
44
+ @bcf1[:pos] + @bcf1[:rlen]
45
+ end
46
+
47
+ def id
48
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
49
+ @bcf1[:d][:id]
50
+ end
51
+
52
+ def filter
53
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
54
+ d = @bcf1[:d]
55
+ n_flt = d[:n_flt]
56
+
57
+ case n_flt
58
+ when 0
59
+ "PASS"
60
+ when 1
61
+ i = d[:flt].read_int
62
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
63
+ when 2
64
+ d[:flt].get_array_of_int(0, n_flt).map do |i|
65
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
66
+ end
67
+ else
68
+ raise "Unexpected number of filters. n_flt: #{n_flt}"
69
+ end
70
+ end
71
+
72
+ def qual
73
+ @bcf1[:qual]
74
+ end
75
+
76
+ def ref
77
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
78
+ @bcf1[:d][:allele].get_pointer(0).read_string
79
+ end
80
+
81
+ def alt
82
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
83
+ @bcf1[:d][:allele].get_array_of_pointer(
84
+ FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
85
+ ).map(&:read_string)
86
+ end
87
+
88
+ def alleles
89
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
90
+ @bcf1[:d][:allele].get_array_of_pointer(
91
+ 0, @bcf1[:n_allele]
92
+ ).map(&:read_string)
93
+ end
94
+
95
+ def info
96
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
97
+ Info.new(self)
98
+ end
99
+
100
+ def format
101
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
102
+ Format.new(self)
103
+ end
104
+
105
+ def to_s
106
+ ksr = LibHTS::KString.new
107
+ raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
108
+
109
+ ksr[:s]
110
+ end
111
+ end
112
+ end
113
+ end
data/lib/hts/bcf.rb ADDED
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ require_relative "bcf/header"
7
+ require_relative "bcf/info"
8
+ require_relative "bcf/format"
9
+ require_relative "bcf/record"
10
+
11
+ module HTS
12
+ class Bcf
13
+ include Enumerable
14
+
15
+ attr_reader :file_path, :mode, :header
16
+ # HtfFile is FFI::BitStruct
17
+ attr_reader :htf_file
18
+
19
+ class << self
20
+ alias open new
21
+ end
22
+
23
+ def initialize(file_path, mode = "r")
24
+ file_path = File.expand_path(file_path)
25
+
26
+ unless File.exist?(file_path)
27
+ message = "No such VCF/BCF file - #{file_path}"
28
+ raise message
29
+ end
30
+
31
+ @file_path = file_path
32
+ @mode = mode
33
+ @htf_file = LibHTS.hts_open(file_path, mode)
34
+ @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
35
+
36
+ # IO like API
37
+ if block_given?
38
+ begin
39
+ yield self
40
+ ensure
41
+ close
42
+ end
43
+ end
44
+ end
45
+
46
+ def struct
47
+ htf_file
48
+ end
49
+
50
+ def to_ptr
51
+ htf_file.to_ptr
52
+ end
53
+
54
+ # Close the current file.
55
+ def close
56
+ LibHTS.hts_close(htf_file)
57
+ end
58
+
59
+ def each
60
+ return to_enum(__method__) unless block_given?
61
+
62
+ while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
63
+ record = Record.new(bcf1, self)
64
+ yield record
65
+ end
66
+ self
67
+ end
68
+
69
+ def sample_count
70
+ LibHTS.bcf_hdr_nsamples(header.struct)
71
+ end
72
+ end
73
+ end
data/lib/hts/faidx.rb ADDED
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ module HTS
7
+ class Faidx
8
+ attr_reader :file_path
9
+
10
+ class << self
11
+ alias open new
12
+ end
13
+
14
+ def initialize(file_path)
15
+ @file_path = File.expand_path(file_path)
16
+ @fai = LibHTS.fai_load(file_path)
17
+
18
+ # IO like API
19
+ if block_given?
20
+ begin
21
+ yield self
22
+ ensure
23
+ close
24
+ end
25
+ end
26
+ end
27
+
28
+ def close
29
+ LibHTS.fai_destroy(@fai)
30
+ end
31
+
32
+ # the number of sequences in the index.
33
+ def size
34
+ LibHTS.faidx_nseq(@fai)
35
+ end
36
+ alias length size
37
+
38
+ # return the length of the requested chromosome.
39
+ def chrom_size(chrom)
40
+ unless chrom.is_a?(String) || chrom.is_a?(Symbol)
41
+ # FIXME
42
+ raise ArgumentError, "Expect chrom to be String or Symbol"
43
+ end
44
+
45
+ chrom = chrom.to_s
46
+ result = LibHTS.faidx_seq_len(@fai, chrom)
47
+ result == -1 ? nil : result
48
+ end
49
+ alias chrom_length chrom_size
50
+
51
+ # FIXME: naming and syntax
52
+ def cget; end
53
+
54
+ # FIXME: naming and syntax
55
+ def get; end
56
+
57
+ # __iter__
58
+ end
59
+ end
@@ -0,0 +1,8 @@
1
+ # Ruby-FFI extensions
2
+
3
+ * Add syntax sugar
4
+ * union_layout
5
+ * struct_layout
6
+
7
+ * Support for bit fields
8
+ * [ffi-bitfield](https://github.com/kojix2/ffi-bitfield)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ffi/bit_field"
4
+
5
+ module FFI
6
+ class Struct
7
+ class << self
8
+ # @example HtsOpt
9
+ # class HtsOpt < FFI::Struct
10
+ # layout \
11
+ # :arg, :string,
12
+ # :opt, HtsFmtOption,
13
+ # :val,
14
+ # union_layout(
15
+ # :i, :int,
16
+ # :s, :string
17
+ # ),
18
+ # :next, HtsOpt.ptr
19
+ # end
20
+
21
+ def union_layout(*args)
22
+ Class.new(FFI::Union) { layout(*args) }
23
+ end
24
+
25
+ # @example HtsFormat
26
+ # class HtsFormat < FFI::Struct
27
+ # layout \
28
+ # :category, HtsFormatCategory,
29
+ # :format, HtsExactFormat,
30
+ # :version,
31
+ # struct_layout(
32
+ # :major, :short,
33
+ # :minor, :short
34
+ # ),
35
+ # :compression, HtsCompression,
36
+ # :compression_level, :short,
37
+ # :specific, :pointer
38
+ # end
39
+
40
+ def struct_layout(*args)
41
+ Class.new(FFI::Struct) { layout(*args) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # BGZF
4
4
  module HTS
5
- module FFI
5
+ module LibHTS
6
6
  # Open an existing file descriptor for reading or writing.
7
7
  attach_function \
8
8
  :bgzf_dopen,