htslib 0.0.1 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ # https://github.com/brentp/hts-nim/blob/master/src/hts/vcf.nim
4
+ # This is a port from Nim.
5
+ # TODO: Make it more like Ruby.
6
+
7
+ module HTS
8
+ class Bcf
9
+ class Format
10
+ def initialize(record)
11
+ @record = record
12
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
13
+ end
14
+
15
+ def get(key, type = nil)
16
+ n = FFI::MemoryPointer.new(:int)
17
+ p1 = @p1
18
+ h = @record.bcf.header.struct
19
+ r = @record.struct
20
+
21
+ format_values = proc do |type|
22
+ ret = LibHTS.bcf_get_format_values(h, r, key, p1, n, type)
23
+ return nil if ret < 0 # return from method.
24
+
25
+ p1.read_pointer
26
+ end
27
+
28
+ case type.to_sym
29
+ when :int, :int32
30
+ format_values.call(LibHTS::BCF_HT_INT)
31
+ .read_array_of_int32(n.read_int)
32
+ when :float, :real
33
+ format_values.call(LibHTS::BCF_HT_REAL)
34
+ .read_array_of_float(n.read_int)
35
+ when :flag
36
+ format_values.call(LibHTS::BCF_HT_FLAG)
37
+ .read_int == 1
38
+ when :string, :str
39
+ format_values.call(LibHTS::BCF_HT_STR)
40
+ .read_pointer.read_string
41
+ end
42
+ end
43
+
44
+ def set; end
45
+
46
+ # def fields # iterator
47
+ # end
48
+
49
+ def genotypes; end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Header
6
+ def initialize(h)
7
+ @h = h
8
+ end
9
+
10
+ def struct
11
+ @h
12
+ end
13
+
14
+ def to_ptr
15
+ @h.to_ptr
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Info
6
+ def initialize(record)
7
+ @record = record
8
+ end
9
+
10
+ # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
11
+ def get(key, type = nil)
12
+ n = FFI::MemoryPointer.new(:int)
13
+ p1 = @record.p1
14
+ h = @record.bcf.header.struct
15
+ r = @record.struct
16
+
17
+ info_values = proc do |type|
18
+ ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
19
+ return nil if ret < 0 # return from method.
20
+
21
+ p1.read_pointer
22
+ end
23
+
24
+ type ||= info_type_to_string(get_info_type(key))
25
+
26
+ case type&.to_sym
27
+ when :int, :int32
28
+ info_values.call(LibHTS::BCF_HT_INT)
29
+ .read_array_of_int32(n.read_int)
30
+ when :float, :real
31
+ info_values.call(LibHTS::BCF_HT_REAL)
32
+ .read_array_of_float(n.read_int)
33
+ when :flag, :bool
34
+ case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
35
+ when 1 then true
36
+ when 0 then false
37
+ when -1 then nil
38
+ else
39
+ raise "Unknown return value from bcf_get_info_flag: #{ret}"
40
+ end
41
+ when :string, :str
42
+ info_values.call(LibHTS::BCF_HT_STR)
43
+ .read_string
44
+ end
45
+ end
46
+
47
+ def fields
48
+ n_info = @record.struct[:n_info]
49
+ Array.new(n_info) do |i|
50
+ fld = LibHTS::BcfInfo.new(
51
+ @record.struct[:d][:info] +
52
+ i * LibHTS::BcfInfo.size
53
+ )
54
+ {
55
+ name: LibHTS.bcf_hdr_int2id(
56
+ @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
57
+ ),
58
+ n: LibHTS.bcf_hdr_id2number(
59
+ @record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
60
+ ),
61
+ vtype: fld[:type], i: fld[:key]
62
+ }
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def get_info_type(key)
69
+ @record.struct[:n_info].times do |i|
70
+ fld = LibHTS::BcfInfo.new(
71
+ @record.struct[:d][:info] +
72
+ i * LibHTS::BcfInfo.size
73
+ )
74
+ id = LibHTS.bcf_hdr_int2id(
75
+ @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
76
+ )
77
+ return fld[:type] if id == key
78
+ end
79
+ end
80
+
81
+ def info_type_to_string(t)
82
+ case t
83
+ when 0 then :flag
84
+ when 1, 2, 3, 4 then :int
85
+ when 5 then :float
86
+ when 7 then :string
87
+ else
88
+ raise "Unknown info type: #{t}"
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Record
6
+ def initialize(bcf_t, bcf)
7
+ @bcf1 = bcf_t
8
+ @bcf = bcf
9
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
+ end
11
+
12
+ attr_reader :p1, :bcf
13
+
14
+ def struct
15
+ @bcf1
16
+ end
17
+
18
+ def to_ptr
19
+ @bcf.to_ptr
20
+ end
21
+
22
+ # def inspect; end
23
+
24
+ def formats; end
25
+
26
+ def genotypes; end
27
+
28
+ def chrom
29
+ hdr = @bcf.header.struct
30
+ rid = @bcf1[:rid]
31
+
32
+ LibHTS.bcf_hdr_id2name(hdr, rid)
33
+ end
34
+
35
+ def pos
36
+ @bcf1[:pos] + 1 # FIXME
37
+ end
38
+
39
+ def start
40
+ @bcf1[:pos]
41
+ end
42
+
43
+ def stop
44
+ @bcf1[:pos] + @bcf1[:rlen]
45
+ end
46
+
47
+ def id
48
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
49
+ @bcf1[:d][:id]
50
+ end
51
+
52
+ def filter
53
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
54
+ d = @bcf1[:d]
55
+ n_flt = d[:n_flt]
56
+
57
+ case n_flt
58
+ when 0
59
+ "PASS"
60
+ when 1
61
+ i = d[:flt].read_int
62
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
63
+ when 2
64
+ d[:flt].get_array_of_int(0, n_flt).map do |i|
65
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
66
+ end
67
+ end
68
+ end
69
+
70
+ def qual
71
+ @bcf1[:qual]
72
+ end
73
+
74
+ def ref
75
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
76
+ @bcf1[:d][:allele].get_pointer(0).read_string
77
+ end
78
+
79
+ def alt
80
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
81
+ @bcf1[:d][:allele].get_array_of_pointer(
82
+ FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
83
+ ).map { |c| c.read_string }
84
+ end
85
+
86
+ def alleles
87
+ @bcf1[:d][:allele].get_array_of_pointer(
88
+ 0, @bcf1[:n_allele]
89
+ ).map { |c| c.read_string }
90
+ end
91
+
92
+ def info
93
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
94
+ Info.new(self)
95
+ end
96
+
97
+ def format
98
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
99
+ Format.new(self)
100
+ end
101
+
102
+ def to_s
103
+ ksr = LibHTS::KString.new
104
+ raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
105
+
106
+ ksr[:s]
107
+ end
108
+ end
109
+ end
110
+ end
data/lib/hts/bcf.rb ADDED
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ require_relative "bcf/header"
7
+ require_relative "bcf/info"
8
+ require_relative "bcf/format"
9
+ require_relative "bcf/record"
10
+
11
+ module HTS
12
+ class Bcf
13
+ include Enumerable
14
+
15
+ attr_reader :file_path, :mode, :header
16
+ # HtfFile is FFI::BitStruct
17
+ attr_reader :htf_file
18
+
19
+ class << self
20
+ alias open new
21
+ end
22
+
23
+ def initialize(file_path, mode = "r")
24
+ file_path = File.expand_path(file_path)
25
+
26
+ unless File.exist?(file_path)
27
+ message = "No such VCF/BCF file - #{file_path}"
28
+ raise message
29
+ end
30
+
31
+ @file_path = file_path
32
+ @mode = mode
33
+ @htf_file = LibHTS.hts_open(file_path, mode)
34
+ @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
35
+
36
+ # IO like API
37
+ if block_given?
38
+ begin
39
+ yield self
40
+ ensure
41
+ close
42
+ end
43
+ end
44
+ end
45
+
46
+ def struct
47
+ htf_file
48
+ end
49
+
50
+ def to_ptr
51
+ htf_file.to_ptr
52
+ end
53
+
54
+ # Close the current file.
55
+ def close
56
+ LibHTS.hts_close(htf_file)
57
+ end
58
+
59
+ def each
60
+ return to_enum(__method__) unless block_given?
61
+
62
+ while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
63
+ record = Record.new(bcf1, self)
64
+ yield record
65
+ end
66
+ self
67
+ end
68
+
69
+ def n_samples
70
+ LibHTS.bcf_hdr_nsamples(header.struct)
71
+ end
72
+ end
73
+ end
data/lib/hts/faidx.rb ADDED
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ module HTS
7
+ class Faidx
8
+ attr_reader :file_path
9
+
10
+ class << self
11
+ alias open new
12
+ end
13
+
14
+ def initialize(file_path)
15
+ @file_path = File.expand_path(file_path)
16
+ @fai = LibHTS.fai_load(file_path)
17
+
18
+ # IO like API
19
+ if block_given?
20
+ begin
21
+ yield self
22
+ ensure
23
+ close
24
+ end
25
+ end
26
+ end
27
+
28
+ def close
29
+ LibHTS.fai_destroy(@fai)
30
+ end
31
+
32
+ # the number of sequences in the index.
33
+ def size
34
+ LibHTS.faidx_nseq(@fai)
35
+ end
36
+ alias length size
37
+
38
+ # return the length of the requested chromosome.
39
+ def chrom_size(chrom)
40
+ unless chrom.is_a?(String) || chrom.is_a?(Symbol)
41
+ # FIXME
42
+ raise ArgumentError, "Expect chrom to be String or Symbol"
43
+ end
44
+
45
+ chrom = chrom.to_s
46
+ result = LibHTS.faidx_seq_len(@fai, chrom)
47
+ result == -1 ? nil : result
48
+ end
49
+ alias chrom_length chrom_size
50
+
51
+ # FIXME: naming and syntax
52
+ def cget; end
53
+
54
+ # FIXME: naming and syntax
55
+ def get; end
56
+
57
+ # __iter__
58
+ end
59
+ end
@@ -0,0 +1,8 @@
1
+ # Ruby-FFI extensions
2
+
3
+ * Add syntax sugar
4
+ * union_layout
5
+ * struct_layout
6
+
7
+ * Support for bit fields
8
+ * [ffi-bitfield](https://github.com/kojix2/ffi-bitfield)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ffi/bit_field'
4
+
5
+ module FFI
6
+ class Struct
7
+ class << self
8
+ # @example HtsOpt
9
+ # class HtsOpt < FFI::Struct
10
+ # layout \
11
+ # :arg, :string,
12
+ # :opt, HtsFmtOption,
13
+ # :val,
14
+ # union_layout(
15
+ # :i, :int,
16
+ # :s, :string
17
+ # ),
18
+ # :next, HtsOpt.ptr
19
+ # end
20
+
21
+ def union_layout(*args)
22
+ Class.new(FFI::Union) { layout(*args) }
23
+ end
24
+
25
+ # @example HtsFormat
26
+ # class HtsFormat < FFI::Struct
27
+ # layout \
28
+ # :category, HtsFormatCategory,
29
+ # :format, HtsExactFormat,
30
+ # :version,
31
+ # struct_layout(
32
+ # :major, :short,
33
+ # :minor, :short
34
+ # ),
35
+ # :compression, HtsCompression,
36
+ # :compression_level, :short,
37
+ # :specific, :pointer
38
+ # end
39
+
40
+ def struct_layout(*args)
41
+ Class.new(FFI::Struct) { layout(*args) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # BGZF
4
4
  module HTS
5
- module FFI
5
+ module LibHTS
6
6
  # Open an existing file descriptor for reading or writing.
7
7
  attach_function \
8
8
  :bgzf_dopen,
@@ -120,7 +120,7 @@ module HTS
120
120
  # Read one line from a BGZF file. It is faster than bgzf_getc()
121
121
  attach_function \
122
122
  :bgzf_getline,
123
- [BGZF, :int, Kstring],
123
+ [BGZF, :int, KString],
124
124
  :int
125
125
 
126
126
  # Read the next BGZF block.