htslib 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,15 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Record
6
- def initialize(bcf_t, bcf)
6
+ def initialize(bcf_t, header)
7
7
  @bcf1 = bcf_t
8
- @bcf = bcf
9
- @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
8
+ @header = header
10
9
  end
11
10
 
12
- attr_reader :p1, :bcf
11
+ attr_reader :header
13
12
 
14
13
  def struct
15
14
  @bcf1
@@ -19,36 +18,48 @@ module HTS
19
18
  @bcf1.to_ptr
20
19
  end
21
20
 
22
- # def inspect; end
23
-
24
- def formats; end
21
+ # Get the reference id of the record.
22
+ def rid
23
+ @bcf1[:rid]
24
+ end
25
25
 
26
- def genotypes; end
26
+ def rid=(rid)
27
+ @bcf1[:rid] = rid
28
+ end
27
29
 
30
+ # Get the chromosome of variant.
28
31
  def chrom
29
- hdr = @bcf.header.struct
30
- rid = @bcf1[:rid]
31
-
32
- LibHTS.bcf_hdr_id2name(hdr, rid)
32
+ LibHTS.bcf_hdr_id2name(@header.struct, rid)
33
33
  end
34
34
 
35
+ # Return 0-based position.
35
36
  def pos
36
- @bcf1[:pos] + 1 # FIXME
37
+ @bcf1[:pos]
37
38
  end
38
39
 
39
- def start
40
- @bcf1[:pos]
40
+ def pos=(pos)
41
+ @bcf1[:pos] = pos
41
42
  end
42
43
 
43
- def stop
44
- @bcf1[:pos] + @bcf1[:rlen]
44
+ # Return the 0-based, exclusive end position
45
+ def endpos
46
+ pos + @bcf1[:rlen]
45
47
  end
46
48
 
49
+ # Return the value of the ID column.
47
50
  def id
48
51
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
49
52
  @bcf1[:d][:id]
50
53
  end
51
54
 
55
+ def id=(id)
56
+ LibHTS.bcf_update_id(@header, @bcf1, id)
57
+ end
58
+
59
+ def clear_id
60
+ LibHTS.bcf_update_id(@header, @bcf1, ".")
61
+ end
62
+
52
63
  def filter
53
64
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
54
65
  d = @bcf1[:d]
@@ -59,20 +70,25 @@ module HTS
59
70
  "PASS"
60
71
  when 1
61
72
  i = d[:flt].read_int
62
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
63
- when 2
73
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
74
+ when 2..nil
64
75
  d[:flt].get_array_of_int(0, n_flt).map do |i|
65
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
76
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
66
77
  end
67
78
  else
68
79
  raise "Unexpected number of filters. n_flt: #{n_flt}"
69
80
  end
70
81
  end
71
82
 
83
+ # Get variant quality.
72
84
  def qual
73
85
  @bcf1[:qual]
74
86
  end
75
87
 
88
+ def qual=(qual)
89
+ @bcf1[:qual] = qual
90
+ end
91
+
76
92
  def ref
77
93
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
78
94
  @bcf1[:d][:allele].get_pointer(0).read_string
@@ -92,22 +108,38 @@ module HTS
92
108
  ).map(&:read_string)
93
109
  end
94
110
 
95
- def info
111
+ def info(key = nil)
96
112
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
97
- Info.new(self)
113
+ info = Info.new(self)
114
+ if key
115
+ info.get(key)
116
+ else
117
+ info
118
+ end
98
119
  end
99
120
 
100
- def format
121
+ def format(key = nil)
101
122
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
102
- Format.new(self)
123
+ if key
124
+ Format.new(self).get(key)
125
+ else
126
+ Format.new(self)
127
+ end
103
128
  end
104
129
 
105
130
  def to_s
106
131
  ksr = LibHTS::KString.new
107
- raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
132
+ raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
108
133
 
109
134
  ksr[:s]
110
135
  end
136
+
137
+ private
138
+
139
+ def initialize_copy(orig)\
140
+ @header = orig.header
141
+ @bcf1 = LibHTS.bcf_dup(orig.struct)
142
+ end
111
143
  end
112
144
  end
113
145
  end
data/lib/hts/bcf.rb CHANGED
@@ -1,73 +1,136 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
5
+ require_relative "hts"
6
6
  require_relative "bcf/header"
7
7
  require_relative "bcf/info"
8
8
  require_relative "bcf/format"
9
9
  require_relative "bcf/record"
10
10
 
11
11
  module HTS
12
- class Bcf
12
+ class Bcf < Hts
13
13
  include Enumerable
14
14
 
15
- attr_reader :file_path, :mode, :header
16
- # HtfFile is FFI::BitStruct
17
- attr_reader :htf_file
15
+ attr_reader :file_name, :index_name, :mode, :header
18
16
 
19
- class << self
20
- alias open new
21
- end
17
+ def self.open(*args, **kw)
18
+ file = new(*args, **kw) # do not yield
19
+ return file unless block_given?
22
20
 
23
- def initialize(file_path, mode = "r")
24
- file_path = File.expand_path(file_path)
21
+ begin
22
+ yield file
23
+ ensure
24
+ file.close
25
+ end
26
+ file
27
+ end
25
28
 
26
- unless File.exist?(file_path)
27
- message = "No such VCF/BCF file - #{file_path}"
29
+ def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
30
+ create_index: false)
31
+ if block_given?
32
+ message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
28
33
  raise message
29
34
  end
30
35
 
31
- @file_path = file_path
32
- @mode = mode
33
- @htf_file = LibHTS.hts_open(file_path, mode)
34
- @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
36
+ # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
35
37
 
36
- # IO like API
37
- if block_given?
38
- begin
39
- yield self
40
- ensure
41
- close
42
- end
38
+ @file_name = file_name
39
+ @index_name = index
40
+ @mode = mode
41
+ @hts_file = LibHTS.hts_open(@file_name, mode)
42
+
43
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
44
+
45
+ if threads&.> 0
46
+ r = LibHTS.hts_set_threads(@hts_file, threads)
47
+ raise "Failed to set number of threads: #{threads}" if r < 0
48
+ end
49
+
50
+ return if @mode[0] == "w"
51
+
52
+ @header = Bcf::Header.new(@hts_file)
53
+
54
+ create_index(index) if create_index
55
+
56
+ @idx = load_index(index)
57
+
58
+ @start_position = tell
59
+ end
60
+
61
+ def create_index(index_name = nil)
62
+ warn "Create index for #{@file_name} to #{index_name}"
63
+ if index
64
+ LibHTS.bcf_index_build2(@hts_file, index_name, -1)
65
+ else
66
+ LibHTS.bcf_index_build(@hts_file, -1)
67
+ end
68
+ end
69
+
70
+ def load_index(index_name = nil)
71
+ if index_name
72
+ LibHTS.bcf_index_load2(@file_name, index_name)
73
+ else
74
+ LibHTS.bcf_index_load3(@file_name, nil, 2)
43
75
  end
44
76
  end
45
77
 
46
- def struct
47
- htf_file
78
+ def index_loaded?
79
+ !@idx.null?
80
+ end
81
+
82
+ def write_header
83
+ raise IOError, "closed stream" if closed?
84
+
85
+ @header = header.dup
86
+ LibHTS.hts_set_fai_filename(header, @file_name)
87
+ LibHTS.bcf_hdr_write(@hts_file, header.struct)
48
88
  end
49
89
 
50
- def to_ptr
51
- htf_file.to_ptr
90
+ def write(var)
91
+ raise IOError, "closed stream" if closed?
92
+
93
+ var_dup = var.dup = var.dup
94
+ LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
52
95
  end
53
96
 
54
97
  # Close the current file.
55
- def close
56
- LibHTS.hts_close(htf_file)
98
+
99
+ def nsamples
100
+ header.nsamples
57
101
  end
58
102
 
59
- def each
103
+ def samples
104
+ header.samples
105
+ end
106
+
107
+ # Iterate over each record.
108
+ # Generate a new Record object each time.
109
+ # Slower than each.
110
+ def each_copy
111
+ raise IOError, "closed stream" if closed?
112
+
60
113
  return to_enum(__method__) unless block_given?
61
114
 
62
- while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
63
- record = Record.new(bcf1, self)
115
+ while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
116
+ record = Record.new(bcf1, header)
64
117
  yield record
65
118
  end
66
119
  self
67
120
  end
68
121
 
69
- def sample_count
70
- LibHTS.bcf_hdr_nsamples(header.struct)
122
+ # Iterate over each record.
123
+ # Record object is reused.
124
+ # Faster than each_copy.
125
+ def each
126
+ raise IOError, "closed stream" if closed?
127
+
128
+ return to_enum(__method__) unless block_given?
129
+
130
+ bcf1 = LibHTS.bcf_init
131
+ record = Record.new(bcf1, header)
132
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
133
+ self
71
134
  end
72
135
  end
73
136
  end
data/lib/hts/faidx.rb CHANGED
@@ -1,19 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
6
5
  module HTS
7
6
  class Faidx
8
- attr_reader :file_path
7
+ attr_reader :file_name
9
8
 
10
9
  class << self
11
10
  alias open new
12
11
  end
13
12
 
14
- def initialize(file_path)
15
- @file_path = File.expand_path(file_path)
16
- @fai = LibHTS.fai_load(file_path)
13
+ def initialize(file_name)
14
+ @file_name = file_name
15
+ @fai = LibHTS.fai_load(@file_name)
17
16
 
18
17
  # IO like API
19
18
  if block_given?
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FFI
4
+ class Pointer
5
+ unless method_defined?(:read_array_of_struct)
6
+ def read_array_of_struct(type, length)
7
+ ary = []
8
+ size = type.size
9
+ tmp = self
10
+ length.times do |j|
11
+ ary << type.new(tmp)
12
+ tmp += size unless j == length - 1 # avoid OOB
13
+ end
14
+ ary
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/hts/hts.rb ADDED
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../htslib"
4
+
5
+ module HTS
6
+ class Hts
7
+ def struct
8
+ @hts_file
9
+ end
10
+
11
+ def to_ptr
12
+ @hts_file.to_ptr
13
+ end
14
+
15
+ def format
16
+ LibHTS.hts_get_format(@hts_file)[:format].to_s
17
+ end
18
+
19
+ def format_version
20
+ v = LibHTS.hts_get_format(@hts_file)[:version]
21
+ major = v[:major]
22
+ minor = v[:minor]
23
+ if minor == -1
24
+ major.to_s
25
+ else
26
+ "#{major}.#{minor}"
27
+ end
28
+ end
29
+
30
+ def close
31
+ return if closed?
32
+
33
+ LibHTS.hts_close(@hts_file)
34
+ @hts_file = nil
35
+ end
36
+
37
+ def closed?
38
+ @hts_file.nil? || @hts_file.null?
39
+ end
40
+
41
+ def seek(offset)
42
+ if @hts_file[:is_cram] == 1
43
+ LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
44
+ elsif @hts_file[:is_bgzf] == 1
45
+ LibHTS.bgzf_seek(@hts_file[:fp][:bgzf], offset, IO::SEEK_SET)
46
+ else
47
+ LibHTS.hseek(@hts_file[:fp][:hfile], offset, IO::SEEK_SET)
48
+ end
49
+ end
50
+
51
+ def tell
52
+ if @hts_file[:is_cram] == 1
53
+ # LibHTS.cram_tell(@hts_file[:fp][:cram])
54
+ # warn 'cram_tell is not implemented in c htslib'
55
+ nil
56
+ elsif @hts_file[:is_bgzf] == 1
57
+ LibHTS.bgzf_tell(@hts_file[:fp][:bgzf])
58
+ else
59
+ LibHTS.htell(@hts_file[:fp][:hfile])
60
+ end
61
+ end
62
+
63
+ def rewind
64
+ if @start_position
65
+ r = seek(@start_position)
66
+ raise "Failed to rewind: #{r}" if r < 0
67
+
68
+ tell
69
+ else
70
+ raise "Cannot rewind: no start position"
71
+ end
72
+ end
73
+ end
74
+ end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- %i[HFILE string],
21
+ [HFILE, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [:HFILE],
27
+ [HFILE],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- %i[HFILE pointer size_t],
33
+ [HFILE, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -75,6 +75,11 @@ module HTS
75
75
  [BGZF],
76
76
  :int
77
77
 
78
+ # Return a virtual file pointer to the current location in the file.
79
+ def self.bgzf_tell(fp)
80
+ (fp[:block_address] << 16) | (fp[:block_offset] & 0xFFFF)
81
+ end
82
+
78
83
  # Set the file to read from the location specified by _pos_.
79
84
  attach_function \
80
85
  :bgzf_seek,
@@ -176,7 +181,7 @@ module HTS
176
181
  # Load BGZF index from an hFILE
177
182
  attach_function \
178
183
  :bgzf_index_load_hfile,
179
- [BGZF, :HFILE, :string],
184
+ [BGZF, HFILE, :string],
180
185
  :int
181
186
 
182
187
  # Save BGZF index
@@ -188,7 +193,7 @@ module HTS
188
193
  # Write a BGZF index to an hFILE
189
194
  attach_function \
190
195
  :bgzf_index_dump_hfile,
191
- [BGZF, :HFILE, :string],
196
+ [BGZF, HFILE, :string],
192
197
  :int
193
198
  end
194
199
  end
@@ -2,7 +2,6 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- typedef :pointer, :HFILE
6
5
  typedef :int64, :hts_pos_t
7
6
  typedef :pointer, :bam_plp_auto_f
8
7
 
@@ -25,6 +24,25 @@ module HTS
25
24
  :f, :pointer # kstream_t
26
25
  end
27
26
 
27
+ # HFILE
28
+
29
+ class HFILE < FFI::BitStruct
30
+ layout \
31
+ :buffer, :string,
32
+ :begin, :string,
33
+ :end, :string,
34
+ :limit, :string,
35
+ :backend, :pointer,
36
+ :offset, :size_t,
37
+ :_flags, :uint,
38
+ :has_errno, :int
39
+
40
+ bit_fields :_flags,
41
+ :at_eof, 1,
42
+ :mobile, 1,
43
+ :readonly, 1
44
+ end
45
+
28
46
  # BGZF
29
47
  class BGZF < FFI::BitStruct
30
48
  layout \
@@ -38,7 +56,7 @@ module HTS
38
56
  :uncompressed_block, :pointer,
39
57
  :compressed_block, :pointer,
40
58
  :cache, :pointer,
41
- :fp, :HFILE,
59
+ :fp, HFILE.ptr,
42
60
  :mt, :pointer,
43
61
  :idx, :pointer,
44
62
  :idx_build_otf, :int,
@@ -198,8 +216,8 @@ module HTS
198
216
  :fp,
199
217
  union_layout(
200
218
  :bgzf, BGZF.ptr,
201
- :cram, :pointer,
202
- :hfile, :pointer # HFILE
219
+ :cram, :pointer, # cram_fd
220
+ :hfile, HFILE.ptr
203
221
  ),
204
222
  :state, :pointer,
205
223
  :format, HtsFormat,
@@ -218,7 +236,7 @@ module HTS
218
236
 
219
237
  SamFile = HtsFile
220
238
 
221
- class HtsThreadPool < FFI::Struct
239
+ class HtsTpool < FFI::Struct
222
240
  layout \
223
241
  :pool, :pointer,
224
242
  :qsize, :int
@@ -461,8 +479,8 @@ module HTS
461
479
  :id, :string,
462
480
  :als, :pointer, # (\\0-separated string)
463
481
  :allele, :pointer,
464
- :info, :pointer, # BcfInfo.ptr,
465
- :fmt, BcfFmt.ptr,
482
+ :info, :pointer, # array of BcfInfo.ptr,
483
+ :fmt, :pointer, # array of BcfFmt.ptr,
466
484
  :var, BcfVariant.ptr,
467
485
  :n_var, :int,
468
486
  :var_type, :int,