htslib 0.0.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Record
6
- def initialize(bcf_t, bcf)
6
+ def initialize(bcf_t, header)
7
7
  @bcf1 = bcf_t
8
- @bcf = bcf
9
- @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
8
+ @header = header
10
9
  end
11
10
 
12
- attr_reader :p1, :bcf
11
+ attr_reader :header
13
12
 
14
13
  def struct
15
14
  @bcf1
@@ -19,36 +18,48 @@ module HTS
19
18
  @bcf1.to_ptr
20
19
  end
21
20
 
22
- # def inspect; end
23
-
24
- def formats; end
21
+ # Get the reference id of the record.
22
+ def rid
23
+ @bcf1[:rid]
24
+ end
25
25
 
26
- def genotypes; end
26
+ def rid=(rid)
27
+ @bcf1[:rid] = rid
28
+ end
27
29
 
30
+ # Get the chromosome of variant.
28
31
  def chrom
29
- hdr = @bcf.header.struct
30
- rid = @bcf1[:rid]
31
-
32
- LibHTS.bcf_hdr_id2name(hdr, rid)
32
+ LibHTS.bcf_hdr_id2name(@header.struct, rid)
33
33
  end
34
34
 
35
+ # Return 0-based position.
35
36
  def pos
36
- @bcf1[:pos] + 1 # FIXME
37
+ @bcf1[:pos]
37
38
  end
38
39
 
39
- def start
40
- @bcf1[:pos]
40
+ def pos=(pos)
41
+ @bcf1[:pos] = pos
41
42
  end
42
43
 
43
- def stop
44
- @bcf1[:pos] + @bcf1[:rlen]
44
+ # Return the 0-based, exclusive end position
45
+ def endpos
46
+ pos + @bcf1[:rlen]
45
47
  end
46
48
 
49
+ # Return the value of the ID column.
47
50
  def id
48
51
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
49
52
  @bcf1[:d][:id]
50
53
  end
51
54
 
55
+ def id=(id)
56
+ LibHTS.bcf_update_id(@header, @bcf1, id)
57
+ end
58
+
59
+ def clear_id
60
+ LibHTS.bcf_update_id(@header, @bcf1, ".")
61
+ end
62
+
52
63
  def filter
53
64
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
54
65
  d = @bcf1[:d]
@@ -59,20 +70,25 @@ module HTS
59
70
  "PASS"
60
71
  when 1
61
72
  i = d[:flt].read_int
62
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
63
- when 2
73
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
74
+ when 2..nil
64
75
  d[:flt].get_array_of_int(0, n_flt).map do |i|
65
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
76
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
66
77
  end
67
78
  else
68
79
  raise "Unexpected number of filters. n_flt: #{n_flt}"
69
80
  end
70
81
  end
71
82
 
83
+ # Get variant quality.
72
84
  def qual
73
85
  @bcf1[:qual]
74
86
  end
75
87
 
88
+ def qual=(qual)
89
+ @bcf1[:qual] = qual
90
+ end
91
+
76
92
  def ref
77
93
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
78
94
  @bcf1[:d][:allele].get_pointer(0).read_string
@@ -92,22 +108,38 @@ module HTS
92
108
  ).map(&:read_string)
93
109
  end
94
110
 
95
- def info
111
+ def info(key = nil)
96
112
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
97
- Info.new(self)
113
+ info = Info.new(self)
114
+ if key
115
+ info.get(key)
116
+ else
117
+ info
118
+ end
98
119
  end
99
120
 
100
- def format
121
+ def format(key = nil)
101
122
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
102
- Format.new(self)
123
+ if key
124
+ Format.new(self).get(key)
125
+ else
126
+ Format.new(self)
127
+ end
103
128
  end
104
129
 
105
130
  def to_s
106
131
  ksr = LibHTS::KString.new
107
- raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
132
+ raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
108
133
 
109
134
  ksr[:s]
110
135
  end
136
+
137
+ private
138
+
139
+ def initialize_copy(orig)\
140
+ @header = orig.header
141
+ @bcf1 = LibHTS.bcf_dup(orig.struct)
142
+ end
111
143
  end
112
144
  end
113
145
  end
data/lib/hts/bcf.rb CHANGED
@@ -1,73 +1,136 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
5
+ require_relative "hts"
6
6
  require_relative "bcf/header"
7
7
  require_relative "bcf/info"
8
8
  require_relative "bcf/format"
9
9
  require_relative "bcf/record"
10
10
 
11
11
  module HTS
12
- class Bcf
12
+ class Bcf < Hts
13
13
  include Enumerable
14
14
 
15
- attr_reader :file_path, :mode, :header
16
- # HtfFile is FFI::BitStruct
17
- attr_reader :htf_file
15
+ attr_reader :file_name, :index_name, :mode, :header
18
16
 
19
- class << self
20
- alias open new
21
- end
17
+ def self.open(*args, **kw)
18
+ file = new(*args, **kw) # do not yield
19
+ return file unless block_given?
22
20
 
23
- def initialize(file_path, mode = "r")
24
- file_path = File.expand_path(file_path)
21
+ begin
22
+ yield file
23
+ ensure
24
+ file.close
25
+ end
26
+ file
27
+ end
25
28
 
26
- unless File.exist?(file_path)
27
- message = "No such VCF/BCF file - #{file_path}"
29
+ def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
30
+ create_index: false)
31
+ if block_given?
32
+ message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
28
33
  raise message
29
34
  end
30
35
 
31
- @file_path = file_path
32
- @mode = mode
33
- @htf_file = LibHTS.hts_open(file_path, mode)
34
- @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
36
+ # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
35
37
 
36
- # IO like API
37
- if block_given?
38
- begin
39
- yield self
40
- ensure
41
- close
42
- end
38
+ @file_name = file_name
39
+ @index_name = index
40
+ @mode = mode
41
+ @hts_file = LibHTS.hts_open(@file_name, mode)
42
+
43
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
44
+
45
+ if threads&.> 0
46
+ r = LibHTS.hts_set_threads(@hts_file, threads)
47
+ raise "Failed to set number of threads: #{threads}" if r < 0
48
+ end
49
+
50
+ return if @mode[0] == "w"
51
+
52
+ @header = Bcf::Header.new(@hts_file)
53
+
54
+ create_index(index) if create_index
55
+
56
+ @idx = load_index(index)
57
+
58
+ @start_position = tell
59
+ end
60
+
61
+ def create_index(index_name = nil)
62
+ warn "Create index for #{@file_name} to #{index_name}"
63
+ if index
64
+ LibHTS.bcf_index_build2(@hts_file, index_name, -1)
65
+ else
66
+ LibHTS.bcf_index_build(@hts_file, -1)
67
+ end
68
+ end
69
+
70
+ def load_index(index_name = nil)
71
+ if index_name
72
+ LibHTS.bcf_index_load2(@file_name, index_name)
73
+ else
74
+ LibHTS.bcf_index_load3(@file_name, nil, 2)
43
75
  end
44
76
  end
45
77
 
46
- def struct
47
- htf_file
78
+ def index_loaded?
79
+ !@idx.null?
80
+ end
81
+
82
+ def write_header
83
+ raise IOError, "closed stream" if closed?
84
+
85
+ @header = header.dup
86
+ LibHTS.hts_set_fai_filename(header, @file_name)
87
+ LibHTS.bcf_hdr_write(@hts_file, header.struct)
48
88
  end
49
89
 
50
- def to_ptr
51
- htf_file.to_ptr
90
+ def write(var)
91
+ raise IOError, "closed stream" if closed?
92
+
93
+ var_dup = var.dup = var.dup
94
+ LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
52
95
  end
53
96
 
54
97
  # Close the current file.
55
- def close
56
- LibHTS.hts_close(htf_file)
98
+
99
+ def nsamples
100
+ header.nsamples
57
101
  end
58
102
 
59
- def each
103
+ def samples
104
+ header.samples
105
+ end
106
+
107
+ # Iterate over each record.
108
+ # Generate a new Record object each time.
109
+ # Slower than each.
110
+ def each_copy
111
+ raise IOError, "closed stream" if closed?
112
+
60
113
  return to_enum(__method__) unless block_given?
61
114
 
62
- while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
63
- record = Record.new(bcf1, self)
115
+ while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
116
+ record = Record.new(bcf1, header)
64
117
  yield record
65
118
  end
66
119
  self
67
120
  end
68
121
 
69
- def sample_count
70
- LibHTS.bcf_hdr_nsamples(header.struct)
122
+ # Iterate over each record.
123
+ # Record object is reused.
124
+ # Faster than each_copy.
125
+ def each
126
+ raise IOError, "closed stream" if closed?
127
+
128
+ return to_enum(__method__) unless block_given?
129
+
130
+ bcf1 = LibHTS.bcf_init
131
+ record = Record.new(bcf1, header)
132
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
133
+ self
71
134
  end
72
135
  end
73
136
  end
data/lib/hts/faidx.rb CHANGED
@@ -1,19 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
6
5
  module HTS
7
6
  class Faidx
8
- attr_reader :file_path
7
+ attr_reader :file_name
9
8
 
10
9
  class << self
11
10
  alias open new
12
11
  end
13
12
 
14
- def initialize(file_path)
15
- @file_path = File.expand_path(file_path)
16
- @fai = LibHTS.fai_load(file_path)
13
+ def initialize(file_name)
14
+ @file_name = file_name
15
+ @fai = LibHTS.fai_load(@file_name)
17
16
 
18
17
  # IO like API
19
18
  if block_given?
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FFI
4
+ class Pointer
5
+ unless method_defined?(:read_array_of_struct)
6
+ def read_array_of_struct(type, length)
7
+ ary = []
8
+ size = type.size
9
+ tmp = self
10
+ length.times do |j|
11
+ ary << type.new(tmp)
12
+ tmp += size unless j == length - 1 # avoid OOB
13
+ end
14
+ ary
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/hts/hts.rb ADDED
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../htslib"
4
+
5
+ module HTS
6
+ class Hts
7
+ def struct
8
+ @hts_file
9
+ end
10
+
11
+ def to_ptr
12
+ @hts_file.to_ptr
13
+ end
14
+
15
+ def format
16
+ LibHTS.hts_get_format(@hts_file)[:format].to_s
17
+ end
18
+
19
+ def format_version
20
+ v = LibHTS.hts_get_format(@hts_file)[:version]
21
+ major = v[:major]
22
+ minor = v[:minor]
23
+ if minor == -1
24
+ major.to_s
25
+ else
26
+ "#{major}.#{minor}"
27
+ end
28
+ end
29
+
30
+ def close
31
+ return if closed?
32
+
33
+ LibHTS.hts_close(@hts_file)
34
+ @hts_file = nil
35
+ end
36
+
37
+ def closed?
38
+ @hts_file.nil? || @hts_file.null?
39
+ end
40
+
41
+ def seek(offset)
42
+ if @hts_file[:is_cram] == 1
43
+ LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
44
+ elsif @hts_file[:is_bgzf] == 1
45
+ LibHTS.bgzf_seek(@hts_file[:fp][:bgzf], offset, IO::SEEK_SET)
46
+ else
47
+ LibHTS.hseek(@hts_file[:fp][:hfile], offset, IO::SEEK_SET)
48
+ end
49
+ end
50
+
51
+ def tell
52
+ if @hts_file[:is_cram] == 1
53
+ # LibHTS.cram_tell(@hts_file[:fp][:cram])
54
+ # warn 'cram_tell is not implemented in c htslib'
55
+ nil
56
+ elsif @hts_file[:is_bgzf] == 1
57
+ LibHTS.bgzf_tell(@hts_file[:fp][:bgzf])
58
+ else
59
+ LibHTS.htell(@hts_file[:fp][:hfile])
60
+ end
61
+ end
62
+
63
+ def rewind
64
+ if @start_position
65
+ r = seek(@start_position)
66
+ raise "Failed to rewind: #{r}" if r < 0
67
+
68
+ tell
69
+ else
70
+ raise "Cannot rewind: no start position"
71
+ end
72
+ end
73
+ end
74
+ end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- %i[HFILE string],
21
+ [HFILE, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [:HFILE],
27
+ [HFILE],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- %i[HFILE pointer size_t],
33
+ [HFILE, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -75,6 +75,11 @@ module HTS
75
75
  [BGZF],
76
76
  :int
77
77
 
78
+ # Return a virtual file pointer to the current location in the file.
79
+ def self.bgzf_tell(fp)
80
+ (fp[:block_address] << 16) | (fp[:block_offset] & 0xFFFF)
81
+ end
82
+
78
83
  # Set the file to read from the location specified by _pos_.
79
84
  attach_function \
80
85
  :bgzf_seek,
@@ -176,7 +181,7 @@ module HTS
176
181
  # Load BGZF index from an hFILE
177
182
  attach_function \
178
183
  :bgzf_index_load_hfile,
179
- [BGZF, :HFILE, :string],
184
+ [BGZF, HFILE, :string],
180
185
  :int
181
186
 
182
187
  # Save BGZF index
@@ -188,7 +193,7 @@ module HTS
188
193
  # Write a BGZF index to an hFILE
189
194
  attach_function \
190
195
  :bgzf_index_dump_hfile,
191
- [BGZF, :HFILE, :string],
196
+ [BGZF, HFILE, :string],
192
197
  :int
193
198
  end
194
199
  end
@@ -2,7 +2,6 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- typedef :pointer, :HFILE
6
5
  typedef :int64, :hts_pos_t
7
6
  typedef :pointer, :bam_plp_auto_f
8
7
 
@@ -25,6 +24,25 @@ module HTS
25
24
  :f, :pointer # kstream_t
26
25
  end
27
26
 
27
+ # HFILE
28
+
29
+ class HFILE < FFI::BitStruct
30
+ layout \
31
+ :buffer, :string,
32
+ :begin, :string,
33
+ :end, :string,
34
+ :limit, :string,
35
+ :backend, :pointer,
36
+ :offset, :size_t,
37
+ :_flags, :uint,
38
+ :has_errno, :int
39
+
40
+ bit_fields :_flags,
41
+ :at_eof, 1,
42
+ :mobile, 1,
43
+ :readonly, 1
44
+ end
45
+
28
46
  # BGZF
29
47
  class BGZF < FFI::BitStruct
30
48
  layout \
@@ -38,7 +56,7 @@ module HTS
38
56
  :uncompressed_block, :pointer,
39
57
  :compressed_block, :pointer,
40
58
  :cache, :pointer,
41
- :fp, :HFILE,
59
+ :fp, HFILE.ptr,
42
60
  :mt, :pointer,
43
61
  :idx, :pointer,
44
62
  :idx_build_otf, :int,
@@ -198,8 +216,8 @@ module HTS
198
216
  :fp,
199
217
  union_layout(
200
218
  :bgzf, BGZF.ptr,
201
- :cram, :pointer,
202
- :hfile, :pointer # HFILE
219
+ :cram, :pointer, # cram_fd
220
+ :hfile, HFILE.ptr
203
221
  ),
204
222
  :state, :pointer,
205
223
  :format, HtsFormat,
@@ -218,7 +236,7 @@ module HTS
218
236
 
219
237
  SamFile = HtsFile
220
238
 
221
- class HtsThreadPool < FFI::Struct
239
+ class HtsTpool < FFI::Struct
222
240
  layout \
223
241
  :pool, :pointer,
224
242
  :qsize, :int
@@ -461,8 +479,8 @@ module HTS
461
479
  :id, :string,
462
480
  :als, :pointer, # (\\0-separated string)
463
481
  :allele, :pointer,
464
- :info, :pointer, # BcfInfo.ptr,
465
- :fmt, BcfFmt.ptr,
482
+ :info, :pointer, # array of BcfInfo.ptr,
483
+ :fmt, :pointer, # array of BcfFmt.ptr,
466
484
  :var, BcfVariant.ptr,
467
485
  :n_var, :int,
468
486
  :var_type, :int,