htslib 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Record
6
+ def initialize(bcf_t, bcf)
7
+ @bcf1 = bcf_t
8
+ @bcf = bcf
9
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
+ end
11
+
12
+ attr_reader :p1, :bcf
13
+
14
+ def struct
15
+ @bcf1
16
+ end
17
+
18
+ def to_ptr
19
+ @bcf.to_ptr
20
+ end
21
+
22
+ # def inspect; end
23
+
24
+ def formats; end
25
+
26
+ def genotypes; end
27
+
28
+ def chrom
29
+ hdr = @bcf.header.struct
30
+ rid = @bcf1[:rid]
31
+
32
+ return nil if hdr.null? || rid < 0 || rid >= hdr[:n][LibHTS::BCF_DT_CTG]
33
+
34
+ LibHTS::BcfIdpair.new(
35
+ hdr[:id][LibHTS::BCF_DT_CTG].to_ptr +
36
+ LibHTS::BcfIdpair.size * rid # offset
37
+ )[:key]
38
+ end
39
+
40
+ def pos
41
+ @bcf1[:pos] + 1 # FIXME
42
+ end
43
+
44
+ def start
45
+ @bcf1[:pos]
46
+ end
47
+
48
+ def stop
49
+ @bcf1[:pos] + @bcf1[:rlen]
50
+ end
51
+
52
+ def id
53
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
54
+ @bcf1[:d][:id]
55
+ end
56
+
57
+ def filter
58
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
59
+ d = @bcf1[:d]
60
+ n_flt = d[:n_flt]
61
+
62
+ case n_flt
63
+ when 0
64
+ "PASS"
65
+ when 1
66
+ i = d[:flt].read_int
67
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
68
+ when 2
69
+ d[:flt].get_array_of_int(0, n_flt).map do |i|
70
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
71
+ end
72
+ end
73
+ end
74
+
75
+ def qual
76
+ @bcf1[:qual]
77
+ end
78
+
79
+ def ref
80
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
81
+ @bcf1[:d][:allele].get_pointer(0).read_string
82
+ end
83
+
84
+ def alt
85
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
86
+ @bcf1[:d][:allele].get_array_of_pointer(
87
+ FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
88
+ ).map { |c| c.read_string }
89
+ end
90
+
91
+ def alleles
92
+ @bcf1[:d][:allele].get_array_of_pointer(
93
+ 0, @bcf1[:n_allele]
94
+ ).map { |c| c.read_string }
95
+ end
96
+
97
+ def info
98
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
99
+ Info.new(self)
100
+ end
101
+
102
+ def format
103
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
104
+ Format.new(self)
105
+ end
106
+
107
+ def to_s
108
+ ksr = LibHTS::KString.new
109
+ if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
110
+ raise "Failed to format record"
111
+ end
112
+ ksr[:s]
113
+ end
114
+ end
115
+ end
116
+ end
data/lib/hts/bcf.rb ADDED
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ require_relative "bcf/header"
7
+ require_relative "bcf/record"
8
+ require_relative "bcf/info"
9
+ require_relative "bcf/format"
10
+ require_relative "utils/open_method"
11
+
12
+ module HTS
13
+ class Bcf
14
+ include Enumerable
15
+ extend Utils::OpenMethod
16
+
17
+ attr_reader :file_path, :mode, :header
18
+ # HtfFile is FFI::BitStruct
19
+ attr_reader :htf_file
20
+
21
+ class << self
22
+ alias open new
23
+ end
24
+
25
+ def initialize(file_path, mode = "r")
26
+ file_path = File.expand_path(file_path)
27
+
28
+ unless File.exist?(file_path)
29
+ message = "No such VCF/BCF file - #{file_path}"
30
+ raise message
31
+ end
32
+
33
+ @file_path = file_path
34
+ @mode = mode
35
+ @htf_file = LibHTS.hts_open(file_path, mode)
36
+ @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
37
+
38
+ # FIXME: should be defined here?
39
+ @bcf1 = LibHTS.bcf_init
40
+
41
+ # IO like API
42
+ if block_given?
43
+ begin
44
+ yield self
45
+ ensure
46
+ close
47
+ end
48
+ end
49
+ end
50
+
51
+ def struct
52
+ htf_file
53
+ end
54
+
55
+ def to_ptr
56
+ htf_file.to_ptr
57
+ end
58
+
59
+ # Close the current file.
60
+ def close
61
+ LibHTS.hts_close(htf_file)
62
+ end
63
+
64
+ def each(&block)
65
+ while LibHTS.bcf_read(htf_file, header, @bcf1) != -1
66
+ record = Record.new(@bcf1, self)
67
+ block.call(record)
68
+ end
69
+ end
70
+
71
+ def n_samples
72
+ LibHTS.bcf_hdr_nsamples(header.struct)
73
+ end
74
+ end
75
+ end
@@ -3,27 +3,17 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "utils/open_method"
7
+
6
8
  module HTS
7
- class Fai
8
- # FIXME: API
9
- def self.open(path)
10
- fai = new(path)
11
- if block_given?
12
- yield(fai)
13
- fai.close
14
- else
15
- fai
16
- end
17
- end
9
+ class Faidx
10
+ extend Utils::OpenMethod
18
11
 
19
- def initialize(path)
20
- @path = File.expand_path(path)
21
- @path.delete_suffix!(".fai")
22
- LibHTS.fai_build(@path) unless File.exist?("#{@path}.fai")
23
- @fai = LibHTS.fai_load(@path)
24
- raise if @fai.null?
12
+ attr_reader :file_path
25
13
 
26
- # at_exit{LibHTS.fai_destroy(@fai)}
14
+ def initialize(file_path)
15
+ @file_path = File.expand_path(file_path)
16
+ @fai = LibHTS.fai_load(file_path)
27
17
  end
28
18
 
29
19
  def close
@@ -38,7 +28,10 @@ module HTS
38
28
 
39
29
  # return the length of the requested chromosome.
40
30
  def chrom_size(chrom)
41
- raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
31
+ unless chrom.is_a?(String) || chrom.is_a?(Symbol)
32
+ # FIXME
33
+ raise ArgumentError, "Expect chrom to be String or Symbol"
34
+ end
42
35
 
43
36
  chrom = chrom.to_s
44
37
  result = LibHTS.faidx_seq_len(@fai, chrom)
@@ -0,0 +1,8 @@
1
+ # Ruby-FFI extensions
2
+
3
+ * Add syntax sugar
4
+ * union_layout
5
+ * struct_layout
6
+
7
+ * Support for bit fields
8
+ * [ffi-bitfield](https://github.com/kojix2/ffi-bitfield)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ffi/bit_struct"
4
+
5
+ module FFI
6
+ class Struct
7
+ class << self
8
+ # @example HtsOpt
9
+ # class HtsOpt < FFI::Struct
10
+ # layout \
11
+ # :arg, :string,
12
+ # :opt, HtsFmtOption,
13
+ # :val,
14
+ # union_layout(
15
+ # :i, :int,
16
+ # :s, :string
17
+ # ),
18
+ # :next, HtsOpt.ptr
19
+ # end
20
+
21
+ def union_layout(*args)
22
+ Class.new(FFI::Union) { layout(*args) }
23
+ end
24
+
25
+ # @example HtsFormat
26
+ # class HtsFormat < FFI::Struct
27
+ # layout \
28
+ # :category, HtsFormatCategory,
29
+ # :format, HtsExactFormat,
30
+ # :version,
31
+ # struct_layout(
32
+ # :major, :short,
33
+ # :minor, :short
34
+ # ),
35
+ # :compression, HtsCompression,
36
+ # :compression_level, :short,
37
+ # :specific, :pointer
38
+ # end
39
+
40
+ def struct_layout(*args)
41
+ Class.new(FFI::Struct) { layout(*args) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -362,7 +362,7 @@ module HTS
362
362
  :format, FaiFormatOptions
363
363
  end
364
364
 
365
- # vcf
365
+ # bcf
366
366
 
367
367
  class BcfVariant < FFI::Struct
368
368
  layout \
@@ -416,7 +416,7 @@ module HTS
416
416
 
417
417
  class BcfIdinfo < FFI::Struct
418
418
  layout \
419
- :info, [:uint8, 3],
419
+ :info, [:uint64_t, 3],
420
420
  :hrec, [BcfHrec.ptr, 3],
421
421
  :id, :int
422
422
  end
@@ -430,7 +430,7 @@ module HTS
430
430
  class BcfHdr < FFI::Struct
431
431
  layout \
432
432
  :n, [:int, 3],
433
- :id, [BcfIdpair.ptr, 3],
433
+ :id, [:pointer, 3], # BcfIdpair.ptr
434
434
  :dict, [:pointer, 3],
435
435
  :samples, :pointer,
436
436
  :hrec, :pointer,
@@ -15,7 +15,6 @@ module HTS
15
15
  BAM_CBACK = 9
16
16
 
17
17
  BAM_CIGAR_STR = "MIDNSHP=XB"
18
- BAM_CIGAR_STR_PADDED = "MIDNSHP=XB??????"
19
18
  BAM_CIGAR_SHIFT = 4
20
19
  BAM_CIGAR_MASK = 0xf
21
20
  BAM_CIGAR_TYPE = 0x3C1A7
@@ -57,9 +57,116 @@ module HTS
57
57
 
58
58
  # macros
59
59
  class << self
60
+ alias bcf_open hts_open
61
+ alias vcf_open hts_open
62
+ alias bcf_close hts_close
63
+ alias vcf_close hts_close
64
+
60
65
  def bcf_hdr_nsamples(hdr)
61
66
  hdr[:n][BCF_DT_SAMPLE]
62
67
  end
68
+
69
+ def bcf_update_info_int32(hdr, line, key, values, n)
70
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
71
+ end
72
+
73
+ def bcf_update_info_float(hdr, line, key, values, n)
74
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
75
+ end
76
+
77
+ def bcf_update_info_flag(hdr, line, key, string, n)
78
+ bcf_update_info(hdr, line, key, string, n, BCF_HT_FLAG)
79
+ end
80
+
81
+ def bcf_update_info_string(hdr, line, key, string)
82
+ bcf_update_info(hdr, line, key, string, 1, BCF_HT_STR)
83
+ end
84
+
85
+ def bcf_update_format_int32(hdr, line, key, values, n)
86
+ bcf_update_format(hdr, line, key, values, n,
87
+ BCF_HT_INT)
88
+ end
89
+
90
+ def bcf_update_format_float(hdr, line, key, values, n)
91
+ bcf_update_format(hdr, line, key, values, n,
92
+ BCF_HT_REAL)
93
+ end
94
+
95
+ def bcf_update_format_char(hdr, line, key, values, n)
96
+ bcf_update_format(hdr, line, key, values, n,
97
+ BCF_HT_STR)
98
+ end
99
+
100
+ def bcf_update_genotypes(hdr, line, gts, n)
101
+ bcf_update_format(hdr, line, "GT", gts, n, BCF_HT_INT)
102
+ end
103
+
104
+ def bcf_gt_phased(idx)
105
+ ((idx + 1) << 1 | 1)
106
+ end
107
+
108
+ def bcf_gt_unphased(idx)
109
+ ((idx + 1) << 1)
110
+ end
111
+
112
+ def bcf_gt_missing
113
+ 0
114
+ end
115
+
116
+ def bcf_gt_is_missing(val)
117
+ ((val) >> 1 ? 0 : 1)
118
+ end
119
+
120
+ def bcf_gt_is_phased(idx)
121
+ ((idx) & 1)
122
+ end
123
+
124
+ def bcf_gt_allele(val)
125
+ (((val) >> 1) - 1)
126
+ end
127
+
128
+ def bcf_alleles2gt(a, b)
129
+ ((a) > (b) ? (a * (a + 1) / 2 + b) : (b * (b + 1) / 2 + a))
130
+ end
131
+
132
+ def bcf_get_info_int32(hdr, line, tag, dst, ndst)
133
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
134
+ end
135
+
136
+ def bcf_get_info_float(hdr, line, tag, dst, ndst)
137
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
138
+ end
139
+
140
+ def bcf_get_info_string(hdr, line, tag, dst, ndst)
141
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
142
+ end
143
+
144
+ def bcf_get_info_flag(hdr, line, tag, dst, ndst)
145
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_FLAG)
146
+ end
147
+
148
+ def bcf_get_format_int32(hdr, line, tag, dst, ndst)
149
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
150
+ end
151
+
152
+ def bcf_get_format_float(hdr, line, tag, dst, ndst)
153
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
154
+ end
155
+
156
+ def bcf_get_format_char(hdr, line, tag, dst, ndst)
157
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
158
+ end
159
+
160
+ def bcf_get_genotypes(hdr, line, dst, ndst)
161
+ bcf_get_format_values(hdr, line, "GT", dst, ndst, BCF_HT_INT)
162
+ end
163
+
164
+ def bcf_hdr_int2id(hdr, type, int_id)
165
+ LibHTS::BcfIdpair.new(
166
+ hdr[:id][type].to_ptr +
167
+ LibHTS::BcfIdpair.size * int_id # offsets
168
+ )[:key]
169
+ end
63
170
  end
64
171
 
65
172
  # constants
data/lib/hts/libhts.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "ffi_ext/struct"
4
+
3
5
  module HTS
4
6
  module LibHTS
5
7
  extend FFI::Library
@@ -18,119 +20,9 @@ module HTS
18
20
  end
19
21
  end
20
22
 
21
- module FFI
22
- class Struct
23
- class << self
24
- # @example HtsOpt
25
- # class HtsOpt < FFI::Struct
26
- # layout \
27
- # :arg, :string,
28
- # :opt, HtsFmtOption,
29
- # :val,
30
- # union_layout(
31
- # :i, :int,
32
- # :s, :string
33
- # ),
34
- # :next, HtsOpt.ptr
35
- # end
36
-
37
- def union_layout(*args)
38
- Class.new(FFI::Union) { layout(*args) }
39
- end
40
-
41
- # @example HtsFormat
42
- # class HtsFormat < FFI::Struct
43
- # layout \
44
- # :category, HtsFormatCategory,
45
- # :format, HtsExactFormat,
46
- # :version,
47
- # struct_layout(
48
- # :major, :short,
49
- # :minor, :short
50
- # ),
51
- # :compression, HtsCompression,
52
- # :compression_level, :short,
53
- # :specific, :pointer
54
- # end
55
-
56
- def struct_layout(*args)
57
- Class.new(FFI::Struct) { layout(*args) }
58
- end
59
- end
60
- end
61
-
62
- # Struct that support bit fields.
63
- # Currently readonly.
64
- class BitStruct < Struct
65
- class << self
66
- module BitFieldsModule
67
- def [](name)
68
- bit_fields = self.class.bit_fields_hash_table
69
- parent, start, width = bit_fields[name]
70
- if parent
71
- (super(parent) >> start) & ((1 << width) - 1)
72
- else
73
- super(name)
74
- end
75
- end
76
- end
77
- private_constant :BitFieldsModule
78
-
79
- attr_reader :bit_fields_hash_table
80
-
81
- # @example Bcf1
82
- # class Bcf1 < FFI::BitStruct
83
- # layout \
84
- # :pos, :hts_pos_t,
85
- # :rlen, :hts_pos_t,
86
- # :rid, :int32_t,
87
- # :qual, :float,
88
- # :_n_info_allele, :uint32_t,
89
- # :_n_fmt_sample, :uint32_t,
90
- # :shared, KString,
91
- # :indiv, KString,
92
- # :d, BcfDec,
93
- # :max_unpack, :int,
94
- # :unpacked, :int,
95
- # :unpack_size, [:int, 3],
96
- # :errcode, :int
97
- #
98
- # bit_fields :_n_info_allele,
99
- # :n_info, 16,
100
- # :n_allele, 16
101
- #
102
- # bit_fields :_n_fmt_sample,
103
- # :n_fmt, 8,
104
- # :n_sample, 24
105
- # end
106
-
107
- def bit_fields(*args)
108
- unless instance_variable_defined?(:@bit_fields_hash_table)
109
- @bit_fields_hash_table = {}
110
- prepend BitFieldsModule
111
- end
112
-
113
- parent = args.shift
114
- labels = []
115
- widths = []
116
- args.each_slice(2) do |l, w|
117
- labels << l
118
- widths << w
119
- end
120
- starts = widths.inject([0]) do |result, w|
121
- result << (result.last + w)
122
- end
123
- labels.zip(starts, widths).each do |l, s, w|
124
- @bit_fields_hash_table[l] = [parent, s, w]
125
- end
126
- end
127
- end
128
- end
129
- end
130
-
131
23
  require_relative "libhts/constants"
132
24
 
133
- # alphabetical order
25
+ # This is alphabetical order.
134
26
  require_relative "libhts/bgzf"
135
27
  require_relative "libhts/faidx"
136
28
  require_relative "libhts/hfile"
@@ -3,8 +3,12 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "utils/open_method"
7
+
6
8
  module HTS
7
- class Tbx
9
+ class Tabix
10
+ extend Utils::OpenMethod
11
+
8
12
  def initialize; end
9
13
 
10
14
  def build; end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ module Utils
5
+ module OpenMethod
6
+ def open(path)
7
+ object = new(path)
8
+ if block_given?
9
+ yield(object)
10
+ object.close
11
+ else
12
+ object
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
data/lib/hts/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- VERSION = "0.0.3"
4
+ VERSION = "0.0.4"
5
5
  end
data/lib/htslib.rb CHANGED
@@ -40,6 +40,6 @@ module HTS
40
40
  end
41
41
 
42
42
  require_relative "hts/bam"
43
- require_relative "hts/fai"
44
- require_relative "hts/tbx"
45
- require_relative "hts/vcf"
43
+ require_relative "hts/faidx"
44
+ require_relative "hts/tabix"
45
+ require_relative "hts/bcf"