htslib 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bcf
5
+ class Record
6
+ def initialize(bcf_t, bcf)
7
+ @bcf1 = bcf_t
8
+ @bcf = bcf
9
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
+ end
11
+
12
+ attr_reader :p1, :bcf
13
+
14
+ def struct
15
+ @bcf1
16
+ end
17
+
18
+ def to_ptr
19
+ @bcf.to_ptr
20
+ end
21
+
22
+ # def inspect; end
23
+
24
+ def formats; end
25
+
26
+ def genotypes; end
27
+
28
+ def chrom
29
+ hdr = @bcf.header.struct
30
+ rid = @bcf1[:rid]
31
+
32
+ return nil if hdr.null? || rid < 0 || rid >= hdr[:n][LibHTS::BCF_DT_CTG]
33
+
34
+ LibHTS::BcfIdpair.new(
35
+ hdr[:id][LibHTS::BCF_DT_CTG].to_ptr +
36
+ LibHTS::BcfIdpair.size * rid # offset
37
+ )[:key]
38
+ end
39
+
40
+ def pos
41
+ @bcf1[:pos] + 1 # FIXME
42
+ end
43
+
44
+ def start
45
+ @bcf1[:pos]
46
+ end
47
+
48
+ def stop
49
+ @bcf1[:pos] + @bcf1[:rlen]
50
+ end
51
+
52
+ def id
53
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
54
+ @bcf1[:d][:id]
55
+ end
56
+
57
+ def filter
58
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
59
+ d = @bcf1[:d]
60
+ n_flt = d[:n_flt]
61
+
62
+ case n_flt
63
+ when 0
64
+ "PASS"
65
+ when 1
66
+ i = d[:flt].read_int
67
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
68
+ when 2
69
+ d[:flt].get_array_of_int(0, n_flt).map do |i|
70
+ LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
71
+ end
72
+ end
73
+ end
74
+
75
+ def qual
76
+ @bcf1[:qual]
77
+ end
78
+
79
+ def ref
80
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
81
+ @bcf1[:d][:allele].get_pointer(0).read_string
82
+ end
83
+
84
+ def alt
85
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
86
+ @bcf1[:d][:allele].get_array_of_pointer(
87
+ FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
88
+ ).map { |c| c.read_string }
89
+ end
90
+
91
+ def alleles
92
+ @bcf1[:d][:allele].get_array_of_pointer(
93
+ 0, @bcf1[:n_allele]
94
+ ).map { |c| c.read_string }
95
+ end
96
+
97
+ def info
98
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
99
+ Info.new(self)
100
+ end
101
+
102
+ def format
103
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
104
+ Format.new(self)
105
+ end
106
+
107
+ def to_s
108
+ ksr = LibHTS::KString.new
109
+ if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
110
+ raise "Failed to format record"
111
+ end
112
+ ksr[:s]
113
+ end
114
+ end
115
+ end
116
+ end
data/lib/hts/bcf.rb ADDED
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ require_relative "bcf/header"
7
+ require_relative "bcf/record"
8
+ require_relative "bcf/info"
9
+ require_relative "bcf/format"
10
+ require_relative "utils/open_method"
11
+
12
+ module HTS
13
+ class Bcf
14
+ include Enumerable
15
+ extend Utils::OpenMethod
16
+
17
+ attr_reader :file_path, :mode, :header
18
+ # HtfFile is FFI::BitStruct
19
+ attr_reader :htf_file
20
+
21
+ class << self
22
+ alias open new
23
+ end
24
+
25
+ def initialize(file_path, mode = "r")
26
+ file_path = File.expand_path(file_path)
27
+
28
+ unless File.exist?(file_path)
29
+ message = "No such VCF/BCF file - #{file_path}"
30
+ raise message
31
+ end
32
+
33
+ @file_path = file_path
34
+ @mode = mode
35
+ @htf_file = LibHTS.hts_open(file_path, mode)
36
+ @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
37
+
38
+ # FIXME: should be defined here?
39
+ @bcf1 = LibHTS.bcf_init
40
+
41
+ # IO like API
42
+ if block_given?
43
+ begin
44
+ yield self
45
+ ensure
46
+ close
47
+ end
48
+ end
49
+ end
50
+
51
+ def struct
52
+ htf_file
53
+ end
54
+
55
+ def to_ptr
56
+ htf_file.to_ptr
57
+ end
58
+
59
+ # Close the current file.
60
+ def close
61
+ LibHTS.hts_close(htf_file)
62
+ end
63
+
64
+ def each(&block)
65
+ while LibHTS.bcf_read(htf_file, header, @bcf1) != -1
66
+ record = Record.new(@bcf1, self)
67
+ block.call(record)
68
+ end
69
+ end
70
+
71
+ def n_samples
72
+ LibHTS.bcf_hdr_nsamples(header.struct)
73
+ end
74
+ end
75
+ end
@@ -3,27 +3,17 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "utils/open_method"
7
+
6
8
  module HTS
7
- class Fai
8
- # FIXME: API
9
- def self.open(path)
10
- fai = new(path)
11
- if block_given?
12
- yield(fai)
13
- fai.close
14
- else
15
- fai
16
- end
17
- end
9
+ class Faidx
10
+ extend Utils::OpenMethod
18
11
 
19
- def initialize(path)
20
- @path = File.expand_path(path)
21
- @path.delete_suffix!(".fai")
22
- LibHTS.fai_build(@path) unless File.exist?("#{@path}.fai")
23
- @fai = LibHTS.fai_load(@path)
24
- raise if @fai.null?
12
+ attr_reader :file_path
25
13
 
26
- # at_exit{LibHTS.fai_destroy(@fai)}
14
+ def initialize(file_path)
15
+ @file_path = File.expand_path(file_path)
16
+ @fai = LibHTS.fai_load(file_path)
27
17
  end
28
18
 
29
19
  def close
@@ -38,7 +28,10 @@ module HTS
38
28
 
39
29
  # return the length of the requested chromosome.
40
30
  def chrom_size(chrom)
41
- raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
31
+ unless chrom.is_a?(String) || chrom.is_a?(Symbol)
32
+ # FIXME
33
+ raise ArgumentError, "Expect chrom to be String or Symbol"
34
+ end
42
35
 
43
36
  chrom = chrom.to_s
44
37
  result = LibHTS.faidx_seq_len(@fai, chrom)
@@ -0,0 +1,8 @@
1
+ # Ruby-FFI extensions
2
+
3
+ * Add syntax sugar
4
+ * union_layout
5
+ * struct_layout
6
+
7
+ * Support for bit fields
8
+ * [ffi-bitfield](https://github.com/kojix2/ffi-bitfield)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ffi/bit_struct"
4
+
5
+ module FFI
6
+ class Struct
7
+ class << self
8
+ # @example HtsOpt
9
+ # class HtsOpt < FFI::Struct
10
+ # layout \
11
+ # :arg, :string,
12
+ # :opt, HtsFmtOption,
13
+ # :val,
14
+ # union_layout(
15
+ # :i, :int,
16
+ # :s, :string
17
+ # ),
18
+ # :next, HtsOpt.ptr
19
+ # end
20
+
21
+ def union_layout(*args)
22
+ Class.new(FFI::Union) { layout(*args) }
23
+ end
24
+
25
+ # @example HtsFormat
26
+ # class HtsFormat < FFI::Struct
27
+ # layout \
28
+ # :category, HtsFormatCategory,
29
+ # :format, HtsExactFormat,
30
+ # :version,
31
+ # struct_layout(
32
+ # :major, :short,
33
+ # :minor, :short
34
+ # ),
35
+ # :compression, HtsCompression,
36
+ # :compression_level, :short,
37
+ # :specific, :pointer
38
+ # end
39
+
40
+ def struct_layout(*args)
41
+ Class.new(FFI::Struct) { layout(*args) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -362,7 +362,7 @@ module HTS
362
362
  :format, FaiFormatOptions
363
363
  end
364
364
 
365
- # vcf
365
+ # bcf
366
366
 
367
367
  class BcfVariant < FFI::Struct
368
368
  layout \
@@ -416,7 +416,7 @@ module HTS
416
416
 
417
417
  class BcfIdinfo < FFI::Struct
418
418
  layout \
419
- :info, [:uint8, 3],
419
+ :info, [:uint64_t, 3],
420
420
  :hrec, [BcfHrec.ptr, 3],
421
421
  :id, :int
422
422
  end
@@ -430,7 +430,7 @@ module HTS
430
430
  class BcfHdr < FFI::Struct
431
431
  layout \
432
432
  :n, [:int, 3],
433
- :id, [BcfIdpair.ptr, 3],
433
+ :id, [:pointer, 3], # BcfIdpair.ptr
434
434
  :dict, [:pointer, 3],
435
435
  :samples, :pointer,
436
436
  :hrec, :pointer,
@@ -15,7 +15,6 @@ module HTS
15
15
  BAM_CBACK = 9
16
16
 
17
17
  BAM_CIGAR_STR = "MIDNSHP=XB"
18
- BAM_CIGAR_STR_PADDED = "MIDNSHP=XB??????"
19
18
  BAM_CIGAR_SHIFT = 4
20
19
  BAM_CIGAR_MASK = 0xf
21
20
  BAM_CIGAR_TYPE = 0x3C1A7
@@ -57,9 +57,116 @@ module HTS
57
57
 
58
58
  # macros
59
59
  class << self
60
+ alias bcf_open hts_open
61
+ alias vcf_open hts_open
62
+ alias bcf_close hts_close
63
+ alias vcf_close hts_close
64
+
60
65
  def bcf_hdr_nsamples(hdr)
61
66
  hdr[:n][BCF_DT_SAMPLE]
62
67
  end
68
+
69
+ def bcf_update_info_int32(hdr, line, key, values, n)
70
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
71
+ end
72
+
73
+ def bcf_update_info_float(hdr, line, key, values, n)
74
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
75
+ end
76
+
77
+ def bcf_update_info_flag(hdr, line, key, string, n)
78
+ bcf_update_info(hdr, line, key, string, n, BCF_HT_FLAG)
79
+ end
80
+
81
+ def bcf_update_info_string(hdr, line, key, string)
82
+ bcf_update_info(hdr, line, key, string, 1, BCF_HT_STR)
83
+ end
84
+
85
+ def bcf_update_format_int32(hdr, line, key, values, n)
86
+ bcf_update_format(hdr, line, key, values, n,
87
+ BCF_HT_INT)
88
+ end
89
+
90
+ def bcf_update_format_float(hdr, line, key, values, n)
91
+ bcf_update_format(hdr, line, key, values, n,
92
+ BCF_HT_REAL)
93
+ end
94
+
95
+ def bcf_update_format_char(hdr, line, key, values, n)
96
+ bcf_update_format(hdr, line, key, values, n,
97
+ BCF_HT_STR)
98
+ end
99
+
100
+ def bcf_update_genotypes(hdr, line, gts, n)
101
+ bcf_update_format(hdr, line, "GT", gts, n, BCF_HT_INT)
102
+ end
103
+
104
+ def bcf_gt_phased(idx)
105
+ ((idx + 1) << 1 | 1)
106
+ end
107
+
108
+ def bcf_gt_unphased(idx)
109
+ ((idx + 1) << 1)
110
+ end
111
+
112
+ def bcf_gt_missing
113
+ 0
114
+ end
115
+
116
+ def bcf_gt_is_missing(val)
117
+ ((val) >> 1 ? 0 : 1)
118
+ end
119
+
120
+ def bcf_gt_is_phased(idx)
121
+ ((idx) & 1)
122
+ end
123
+
124
+ def bcf_gt_allele(val)
125
+ (((val) >> 1) - 1)
126
+ end
127
+
128
+ def bcf_alleles2gt(a, b)
129
+ ((a) > (b) ? (a * (a + 1) / 2 + b) : (b * (b + 1) / 2 + a))
130
+ end
131
+
132
+ def bcf_get_info_int32(hdr, line, tag, dst, ndst)
133
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
134
+ end
135
+
136
+ def bcf_get_info_float(hdr, line, tag, dst, ndst)
137
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
138
+ end
139
+
140
+ def bcf_get_info_string(hdr, line, tag, dst, ndst)
141
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
142
+ end
143
+
144
+ def bcf_get_info_flag(hdr, line, tag, dst, ndst)
145
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_FLAG)
146
+ end
147
+
148
+ def bcf_get_format_int32(hdr, line, tag, dst, ndst)
149
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
150
+ end
151
+
152
+ def bcf_get_format_float(hdr, line, tag, dst, ndst)
153
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
154
+ end
155
+
156
+ def bcf_get_format_char(hdr, line, tag, dst, ndst)
157
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
158
+ end
159
+
160
+ def bcf_get_genotypes(hdr, line, dst, ndst)
161
+ bcf_get_format_values(hdr, line, "GT", dst, ndst, BCF_HT_INT)
162
+ end
163
+
164
+ def bcf_hdr_int2id(hdr, type, int_id)
165
+ LibHTS::BcfIdpair.new(
166
+ hdr[:id][type].to_ptr +
167
+ LibHTS::BcfIdpair.size * int_id # offsets
168
+ )[:key]
169
+ end
63
170
  end
64
171
 
65
172
  # constants
data/lib/hts/libhts.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "ffi_ext/struct"
4
+
3
5
  module HTS
4
6
  module LibHTS
5
7
  extend FFI::Library
@@ -18,119 +20,9 @@ module HTS
18
20
  end
19
21
  end
20
22
 
21
- module FFI
22
- class Struct
23
- class << self
24
- # @example HtsOpt
25
- # class HtsOpt < FFI::Struct
26
- # layout \
27
- # :arg, :string,
28
- # :opt, HtsFmtOption,
29
- # :val,
30
- # union_layout(
31
- # :i, :int,
32
- # :s, :string
33
- # ),
34
- # :next, HtsOpt.ptr
35
- # end
36
-
37
- def union_layout(*args)
38
- Class.new(FFI::Union) { layout(*args) }
39
- end
40
-
41
- # @example HtsFormat
42
- # class HtsFormat < FFI::Struct
43
- # layout \
44
- # :category, HtsFormatCategory,
45
- # :format, HtsExactFormat,
46
- # :version,
47
- # struct_layout(
48
- # :major, :short,
49
- # :minor, :short
50
- # ),
51
- # :compression, HtsCompression,
52
- # :compression_level, :short,
53
- # :specific, :pointer
54
- # end
55
-
56
- def struct_layout(*args)
57
- Class.new(FFI::Struct) { layout(*args) }
58
- end
59
- end
60
- end
61
-
62
- # Struct that support bit fields.
63
- # Currently readonly.
64
- class BitStruct < Struct
65
- class << self
66
- module BitFieldsModule
67
- def [](name)
68
- bit_fields = self.class.bit_fields_hash_table
69
- parent, start, width = bit_fields[name]
70
- if parent
71
- (super(parent) >> start) & ((1 << width) - 1)
72
- else
73
- super(name)
74
- end
75
- end
76
- end
77
- private_constant :BitFieldsModule
78
-
79
- attr_reader :bit_fields_hash_table
80
-
81
- # @example Bcf1
82
- # class Bcf1 < FFI::BitStruct
83
- # layout \
84
- # :pos, :hts_pos_t,
85
- # :rlen, :hts_pos_t,
86
- # :rid, :int32_t,
87
- # :qual, :float,
88
- # :_n_info_allele, :uint32_t,
89
- # :_n_fmt_sample, :uint32_t,
90
- # :shared, KString,
91
- # :indiv, KString,
92
- # :d, BcfDec,
93
- # :max_unpack, :int,
94
- # :unpacked, :int,
95
- # :unpack_size, [:int, 3],
96
- # :errcode, :int
97
- #
98
- # bit_fields :_n_info_allele,
99
- # :n_info, 16,
100
- # :n_allele, 16
101
- #
102
- # bit_fields :_n_fmt_sample,
103
- # :n_fmt, 8,
104
- # :n_sample, 24
105
- # end
106
-
107
- def bit_fields(*args)
108
- unless instance_variable_defined?(:@bit_fields_hash_table)
109
- @bit_fields_hash_table = {}
110
- prepend BitFieldsModule
111
- end
112
-
113
- parent = args.shift
114
- labels = []
115
- widths = []
116
- args.each_slice(2) do |l, w|
117
- labels << l
118
- widths << w
119
- end
120
- starts = widths.inject([0]) do |result, w|
121
- result << (result.last + w)
122
- end
123
- labels.zip(starts, widths).each do |l, s, w|
124
- @bit_fields_hash_table[l] = [parent, s, w]
125
- end
126
- end
127
- end
128
- end
129
- end
130
-
131
23
  require_relative "libhts/constants"
132
24
 
133
- # alphabetical order
25
+ # This is alphabetical order.
134
26
  require_relative "libhts/bgzf"
135
27
  require_relative "libhts/faidx"
136
28
  require_relative "libhts/hfile"
@@ -3,8 +3,12 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "utils/open_method"
7
+
6
8
  module HTS
7
- class Tbx
9
+ class Tabix
10
+ extend Utils::OpenMethod
11
+
8
12
  def initialize; end
9
13
 
10
14
  def build; end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ module Utils
5
+ module OpenMethod
6
+ def open(path)
7
+ object = new(path)
8
+ if block_given?
9
+ yield(object)
10
+ object.close
11
+ else
12
+ object
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
data/lib/hts/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- VERSION = "0.0.3"
4
+ VERSION = "0.0.4"
5
5
  end
data/lib/htslib.rb CHANGED
@@ -40,6 +40,6 @@ module HTS
40
40
  end
41
41
 
42
42
  require_relative "hts/bam"
43
- require_relative "hts/fai"
44
- require_relative "hts/tbx"
45
- require_relative "hts/vcf"
43
+ require_relative "hts/faidx"
44
+ require_relative "hts/tabix"
45
+ require_relative "hts/bcf"