htslib 0.0.2 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ # https://github.com/brentp/hts-nim/blob/master/src/hts/vcf.nim
4
+ # This is a port from Nim.
5
+ # TODO: Make it more like Ruby.
6
+
7
+ module HTS
8
+ class Bcf
9
+ class Format
10
+ def initialize(record)
11
+ @record = record
12
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
13
+ end
14
+
15
+ def get(key, type = nil)
16
+ n = FFI::MemoryPointer.new(:int)
17
+ p1 = @p1
18
+ h = @record.bcf.header.struct
19
+ r = @record.struct
20
+
21
+ format_values = proc do |type|
22
+ ret = LibHTS.bcf_get_format_values(h, r, key, p1, n, type)
23
+ return nil if ret < 0 # return from method.
24
+
25
+ p1.read_pointer
26
+ end
27
+
28
+ case type.to_sym
29
+ when :int, :int32
30
+ format_values.call(LibHTS::BCF_HT_INT)
31
+ .read_array_of_int32(n.read_int)
32
+ when :float, :real
33
+ format_values.call(LibHTS::BCF_HT_REAL)
34
+ .read_array_of_float(n.read_int)
35
+ when :flag
36
+ format_values.call(LibHTS::BCF_HT_FLAG)
37
+ .read_int == 1
38
+ when :string, :str
39
+ raise NotImplementedError, "String type not implemented yet."
40
+ format_values.call(LibHTS::BCF_HT_STR)
41
+ .read_string
42
+ end
43
+ end
44
+
45
+ def set; end
46
+
47
+ # def fields # iterator
48
+ # end
49
+
50
+ def genotypes; end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Header
6
+ def initialize(bcf_hdr)
7
+ @bcf_hdr = bcf_hdr
8
+ end
9
+
10
+ def struct
11
+ @bcf_hdr
12
+ end
13
+
14
+ def to_ptr
15
+ @bcf_hdr.to_ptr
16
+ end
17
+
18
+ def to_s
19
+ kstr = LibHTS::KString.new
20
+ raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
21
+
22
+ kstr[:s]
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Info
6
+ def initialize(record)
7
+ @record = record
8
+ end
9
+
10
+ # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
11
+ def get(key, type = nil)
12
+ n = FFI::MemoryPointer.new(:int)
13
+ p1 = @record.p1
14
+ h = @record.bcf.header.struct
15
+ r = @record.struct
16
+
17
+ info_values = proc do |type|
18
+ ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
19
+ return nil if ret < 0 # return from method.
20
+
21
+ p1.read_pointer
22
+ end
23
+
24
+ type ||= info_type_to_string(get_info_type(key))
25
+
26
+ case type&.to_sym
27
+ when :int, :int32
28
+ info_values.call(LibHTS::BCF_HT_INT)
29
+ .read_array_of_int32(n.read_int)
30
+ when :float, :real
31
+ info_values.call(LibHTS::BCF_HT_REAL)
32
+ .read_array_of_float(n.read_int)
33
+ when :flag, :bool
34
+ case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
35
+ when 1 then true
36
+ when 0 then false
37
+ when -1 then nil
38
+ else
39
+ raise "Unknown return value from bcf_get_info_flag: #{ret}"
40
+ end
41
+ when :string, :str
42
+ info_values.call(LibHTS::BCF_HT_STR)
43
+ .read_string
44
+ end
45
+ end
46
+
47
+ # FIXME: naming? room for improvement.
48
+ def fields
49
+ n_info = @record.struct[:n_info]
50
+ Array.new(n_info) do |i|
51
+ fld = LibHTS::BcfInfo.new(
52
+ @record.struct[:d][:info] +
53
+ i * LibHTS::BcfInfo.size
54
+ )
55
+ {
56
+ name: LibHTS.bcf_hdr_int2id(
57
+ @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
58
+ ),
59
+ n: LibHTS.bcf_hdr_id2number(
60
+ @record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
61
+ ),
62
+ vtype: fld[:type], i: fld[:key]
63
+ }
64
+ end
65
+ end
66
+
67
+ private
68
+
69
+ def get_info_type(key)
70
+ @record.struct[:n_info].times do |i|
71
+ fld = LibHTS::BcfInfo.new(
72
+ @record.struct[:d][:info] +
73
+ i * LibHTS::BcfInfo.size
74
+ )
75
+ id = LibHTS.bcf_hdr_int2id(
76
+ @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
77
+ )
78
+ return fld[:type] if id == key
79
+ end
80
+ end
81
+
82
+ def info_type_to_string(t)
83
+ case t
84
+ when 0 then :flag
85
+ when 1, 2, 3, 4 then :int
86
+ when 5 then :float
87
+ when 7 then :string
88
+ else
89
+ raise "Unknown info type: #{t}"
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Record
6
+ def initialize(bcf_t, bcf)
7
+ @bcf1 = bcf_t
8
+ @bcf = bcf
9
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
+ end
11
+
12
+ attr_reader :p1, :bcf
13
+
14
+ def struct
15
+ @bcf1
16
+ end
17
+
18
+ def to_ptr
19
+ @bcf1.to_ptr
20
+ end
21
+
22
+ # def inspect; end
23
+
24
+ def formats; end
25
+
26
+ def genotypes; end
27
+
28
+ def chrom
29
+ hdr = @bcf.header.struct
30
+ rid = @bcf1[:rid]
31
+
32
+ LibHTS.bcf_hdr_id2name(hdr, rid)
33
+ end
34
+
35
+ def pos
36
+ @bcf1[:pos] + 1 # FIXME
37
+ end
38
+
39
+ def start
40
+ @bcf1[:pos]
41
+ end
42
+
43
+ def stop
44
+ @bcf1[:pos] + @bcf1[:rlen]
45
+ end
46
+
47
+ def id
48
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
49
+ @bcf1[:d][:id]
50
+ end
51
+
52
+ def filter
53
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
54
+ d = @bcf1[:d]
55
+ n_flt = d[:n_flt]
56
+
57
+ case n_flt
58
+ when 0
59
+ "PASS"
60
+ when 1
61
+ i = d[:flt].read_int
62
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
63
+ when 2
64
+ d[:flt].get_array_of_int(0, n_flt).map do |i|
65
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
66
+ end
67
+ else
68
+ raise "Unexpected number of filters. n_flt: #{n_flt}"
69
+ end
70
+ end
71
+
72
+ def qual
73
+ @bcf1[:qual]
74
+ end
75
+
76
+ def ref
77
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
78
+ @bcf1[:d][:allele].get_pointer(0).read_string
79
+ end
80
+
81
+ def alt
82
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
83
+ @bcf1[:d][:allele].get_array_of_pointer(
84
+ FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
85
+ ).map(&:read_string)
86
+ end
87
+
88
+ def alleles
89
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
90
+ @bcf1[:d][:allele].get_array_of_pointer(
91
+ 0, @bcf1[:n_allele]
92
+ ).map(&:read_string)
93
+ end
94
+
95
+ def info
96
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
97
+ Info.new(self)
98
+ end
99
+
100
+ def format
101
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
102
+ Format.new(self)
103
+ end
104
+
105
+ def to_s
106
+ ksr = LibHTS::KString.new
107
+ raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
108
+
109
+ ksr[:s]
110
+ end
111
+ end
112
+ end
113
+ end
data/lib/hts/bcf.rb ADDED
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ require_relative "bcf/header"
7
+ require_relative "bcf/info"
8
+ require_relative "bcf/format"
9
+ require_relative "bcf/record"
10
+
11
+ module HTS
12
+ class Bcf
13
+ include Enumerable
14
+
15
+ attr_reader :file_path, :mode, :header
16
+ # HtfFile is FFI::BitStruct
17
+ attr_reader :htf_file
18
+
19
+ class << self
20
+ alias open new
21
+ end
22
+
23
+ def initialize(file_path, mode = "r")
24
+ file_path = File.expand_path(file_path)
25
+
26
+ unless File.exist?(file_path)
27
+ message = "No such VCF/BCF file - #{file_path}"
28
+ raise message
29
+ end
30
+
31
+ @file_path = file_path
32
+ @mode = mode
33
+ @htf_file = LibHTS.hts_open(file_path, mode)
34
+ @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
35
+
36
+ # IO like API
37
+ if block_given?
38
+ begin
39
+ yield self
40
+ ensure
41
+ close
42
+ end
43
+ end
44
+ end
45
+
46
+ def struct
47
+ htf_file
48
+ end
49
+
50
+ def to_ptr
51
+ htf_file.to_ptr
52
+ end
53
+
54
+ # Close the current file.
55
+ def close
56
+ LibHTS.hts_close(htf_file)
57
+ end
58
+
59
+ def each
60
+ return to_enum(__method__) unless block_given?
61
+
62
+ while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
63
+ record = Record.new(bcf1, self)
64
+ yield record
65
+ end
66
+ self
67
+ end
68
+
69
+ def sample_count
70
+ LibHTS.bcf_hdr_nsamples(header.struct)
71
+ end
72
+ end
73
+ end
data/lib/hts/faidx.rb ADDED
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ module HTS
7
+ class Faidx
8
+ attr_reader :file_path
9
+
10
+ class << self
11
+ alias open new
12
+ end
13
+
14
+ def initialize(file_path)
15
+ @file_path = File.expand_path(file_path)
16
+ @fai = LibHTS.fai_load(file_path)
17
+
18
+ # IO like API
19
+ if block_given?
20
+ begin
21
+ yield self
22
+ ensure
23
+ close
24
+ end
25
+ end
26
+ end
27
+
28
+ def close
29
+ LibHTS.fai_destroy(@fai)
30
+ end
31
+
32
+ # the number of sequences in the index.
33
+ def size
34
+ LibHTS.faidx_nseq(@fai)
35
+ end
36
+ alias length size
37
+
38
+ # return the length of the requested chromosome.
39
+ def chrom_size(chrom)
40
+ unless chrom.is_a?(String) || chrom.is_a?(Symbol)
41
+ # FIXME
42
+ raise ArgumentError, "Expect chrom to be String or Symbol"
43
+ end
44
+
45
+ chrom = chrom.to_s
46
+ result = LibHTS.faidx_seq_len(@fai, chrom)
47
+ result == -1 ? nil : result
48
+ end
49
+ alias chrom_length chrom_size
50
+
51
+ # FIXME: naming and syntax
52
+ def cget; end
53
+
54
+ # FIXME: naming and syntax
55
+ def get; end
56
+
57
+ # __iter__
58
+ end
59
+ end
@@ -0,0 +1,8 @@
1
+ # Ruby-FFI extensions
2
+
3
+ * Add syntax sugar
4
+ * union_layout
5
+ * struct_layout
6
+
7
+ * Support for bit fields
8
+ * [ffi-bitfield](https://github.com/kojix2/ffi-bitfield)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ffi/bit_field"
4
+
5
+ module FFI
6
+ class Struct
7
+ class << self
8
+ # @example HtsOpt
9
+ # class HtsOpt < FFI::Struct
10
+ # layout \
11
+ # :arg, :string,
12
+ # :opt, HtsFmtOption,
13
+ # :val,
14
+ # union_layout(
15
+ # :i, :int,
16
+ # :s, :string
17
+ # ),
18
+ # :next, HtsOpt.ptr
19
+ # end
20
+
21
+ def union_layout(*args)
22
+ Class.new(FFI::Union) { layout(*args) }
23
+ end
24
+
25
+ # @example HtsFormat
26
+ # class HtsFormat < FFI::Struct
27
+ # layout \
28
+ # :category, HtsFormatCategory,
29
+ # :format, HtsExactFormat,
30
+ # :version,
31
+ # struct_layout(
32
+ # :major, :short,
33
+ # :minor, :short
34
+ # ),
35
+ # :compression, HtsCompression,
36
+ # :compression_level, :short,
37
+ # :specific, :pointer
38
+ # end
39
+
40
+ def struct_layout(*args)
41
+ Class.new(FFI::Struct) { layout(*args) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # BGZF
4
4
  module HTS
5
- module FFI
5
+ module LibHTS
6
6
  # Open an existing file descriptor for reading or writing.
7
7
  attach_function \
8
8
  :bgzf_dopen,