htslib 0.0.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
+ # A class for working with VCF records.
5
6
  class Record
6
7
  def initialize(bcf_t, header)
7
8
  @bcf1 = bcf_t
@@ -18,57 +19,46 @@ module HTS
18
19
  @bcf1.to_ptr
19
20
  end
20
21
 
21
- # def inspect; end
22
-
23
- def formats; end
22
+ # Get the reference id of the record.
23
+ def rid
24
+ @bcf1[:rid]
25
+ end
24
26
 
25
- def genotypes; end
27
+ def rid=(rid)
28
+ @bcf1[:rid] = rid
29
+ end
26
30
 
31
+ # Get the chromosome of variant.
27
32
  def chrom
28
- rid = @bcf1[:rid]
29
-
30
33
  LibHTS.bcf_hdr_id2name(@header.struct, rid)
31
34
  end
32
35
 
36
+ # Return 0-based position.
33
37
  def pos
34
- @bcf1[:pos] + 1 # FIXME
38
+ @bcf1[:pos]
35
39
  end
36
40
 
37
- def start
38
- @bcf1[:pos]
41
+ def pos=(pos)
42
+ @bcf1[:pos] = pos
39
43
  end
40
44
 
41
- def stop
42
- @bcf1[:pos] + @bcf1[:rlen]
45
+ # Return the 0-based, exclusive end position
46
+ def endpos
47
+ pos + @bcf1[:rlen]
43
48
  end
44
49
 
50
+ # Return the value of the ID column.
45
51
  def id
46
52
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
47
53
  @bcf1[:d][:id]
48
54
  end
49
55
 
50
- def filter
51
- LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
52
- d = @bcf1[:d]
53
- n_flt = d[:n_flt]
54
-
55
- case n_flt
56
- when 0
57
- "PASS"
58
- when 1
59
- i = d[:flt].read_int
60
- LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
61
- when 2
62
- d[:flt].get_array_of_int(0, n_flt).map do |i|
63
- LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
64
- end
65
- else
66
- raise "Unexpected number of filters. n_flt: #{n_flt}"
67
- end
56
+ def id=(id)
57
+ LibHTS.bcf_update_id(@header, @bcf1, id)
68
58
  end
69
59
 
70
- def qual
71
- @bcf1[:qual]
60
+ def clear_id
61
+ LibHTS.bcf_update_id(@header, @bcf1, ".")
72
62
  end
73
63
 
74
64
  def ref
@@ -90,14 +80,52 @@ module HTS
90
80
  ).map(&:read_string)
91
81
  end
92
82
 
93
- def info
83
+ # Get variant quality.
84
+ def qual
85
+ @bcf1[:qual]
86
+ end
87
+
88
+ def qual=(qual)
89
+ @bcf1[:qual] = qual
90
+ end
91
+
92
+ def filter
93
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
94
+ d = @bcf1[:d]
95
+ n_flt = d[:n_flt]
96
+
97
+ case n_flt
98
+ when 0
99
+ "PASS"
100
+ when 1
101
+ id = d[:flt].read_int
102
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, id)
103
+ when 2..nil
104
+ d[:flt].get_array_of_int(0, n_flt).map do |i|
105
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
106
+ end
107
+ else
108
+ raise "Unexpected number of filters. n_flt: #{n_flt}"
109
+ end
110
+ end
111
+
112
+ def info(key = nil)
94
113
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
95
- Info.new(self)
114
+ info = Info.new(self)
115
+ if key
116
+ info.get(key)
117
+ else
118
+ info
119
+ end
96
120
  end
97
121
 
98
- def format
122
+ def format(key = nil)
99
123
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
100
- Format.new(self)
124
+ if key
125
+ Format.new(self).get(key)
126
+ else
127
+ Format.new(self)
128
+ end
101
129
  end
102
130
 
103
131
  def to_s
@@ -109,8 +137,9 @@ module HTS
109
137
 
110
138
  private
111
139
 
112
- def initialize_copy
113
- raise "Not implemented"
140
+ def initialize_copy(orig)\
141
+ @header = orig.header
142
+ @bcf1 = LibHTS.bcf_dup(orig.struct)
114
143
  end
115
144
  end
116
145
  end
data/lib/hts/bcf.rb CHANGED
@@ -1,21 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
5
+ require_relative "hts"
6
6
  require_relative "bcf/header"
7
7
  require_relative "bcf/info"
8
8
  require_relative "bcf/format"
9
9
  require_relative "bcf/record"
10
10
 
11
11
  module HTS
12
- class Bcf
12
+ # A class for working with VCF, BCF files.
13
+ class Bcf < Hts
13
14
  include Enumerable
14
15
 
15
- attr_reader :file_path, :mode, :header
16
+ attr_reader :file_name, :index_name, :mode, :header
16
17
 
17
- def self.open(...)
18
- file = new(...)
18
+ def self.open(*args, **kw)
19
+ file = new(*args, **kw) # do not yield
19
20
  return file unless block_given?
20
21
 
21
22
  begin
@@ -26,67 +27,100 @@ module HTS
26
27
  file
27
28
  end
28
29
 
29
- def initialize(filename, mode = "r", threads: nil)
30
- raise "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead" if block_given?
31
-
32
- @file_path = filename == "-" ? "-" : File.expand_path(filename)
33
-
34
- if mode[0] == "r" && !File.exist?(file_path)
35
- message = "No such VCF/BCF file - #{file_path}"
30
+ def initialize(file_name, mode = "r", index: nil, threads: nil,
31
+ create_index: false)
32
+ if block_given?
33
+ message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
36
34
  raise message
37
35
  end
38
36
 
39
- @mode = mode
40
- @hts_file = LibHTS.hts_open(file_path, mode)
37
+ # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
41
38
 
42
- if threads&.> 0
43
- r = LibHTS.hts_set_threads(@hts_file, threads)
44
- raise "Failed to set number of threads: #{threads}" if r < 0
45
- end
39
+ @file_name = file_name
40
+ @index_name = index
41
+ @mode = mode
42
+ @hts_file = LibHTS.hts_open(@file_name, mode)
43
+
44
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
45
+
46
+ set_threads(threads) if threads
46
47
 
47
- return if mode[0] == "w"
48
+ return if @mode[0] == "w"
48
49
 
49
50
  @header = Bcf::Header.new(@hts_file)
51
+ create_index(index) if create_index
52
+ @idx = load_index(index)
53
+ @start_position = tell
54
+ super # do nothing
50
55
  end
51
56
 
52
- def struct
53
- @hts_file
57
+ def create_index(index_name = nil)
58
+ check_closed
59
+
60
+ warn "Create index for #{@file_name} to #{index_name}"
61
+ if index_name
62
+ LibHTS.bcf_index_build2(@file_name, index_name, -1)
63
+ else
64
+ LibHTS.bcf_index_build(@file_name, -1)
65
+ end
54
66
  end
55
67
 
56
- def to_ptr
57
- @hts_file.to_ptr
68
+ def load_index(index_name = nil)
69
+ check_closed
70
+
71
+ if index_name
72
+ LibHTS.bcf_index_load2(@file_name, index_name)
73
+ else
74
+ LibHTS.bcf_index_load3(@file_name, nil, 2)
75
+ end
76
+ end
77
+
78
+ def index_loaded?
79
+ check_closed
80
+
81
+ !@idx.null?
58
82
  end
59
83
 
60
84
  def write_header
85
+ check_closed
86
+
61
87
  @header = header.dup
62
- LibHTS.hts_set_fai_filename(header, @file_path)
63
- LibHTS.bcf_hdr_write(@hts_file, header.struct)
88
+ LibHTS.hts_set_fai_filename(header, @file_name)
89
+ LibHTS.bcf_hdr_write(@hts_file, header)
64
90
  end
65
91
 
66
92
  def write(var)
67
- var_dup = var.dup = var.dup
93
+ check_closed
94
+
95
+ var_dup = var.dup
68
96
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
69
97
  end
70
98
 
71
99
  # Close the current file.
72
- def close
73
- LibHTS.hts_close(@hts_file)
74
- @hts_file = nil
75
- end
76
100
 
77
- def closed?
78
- @hts_file.nil?
101
+ def nsamples
102
+ check_closed
103
+
104
+ header.nsamples
79
105
  end
80
106
 
81
- def sample_count
82
- header.sample_count
107
+ def samples
108
+ check_closed
109
+
110
+ header.samples
83
111
  end
84
112
 
85
- def sample_names
86
- header.sample_names
113
+ def each(copy: false, &block)
114
+ if copy
115
+ each_record_copy(&block)
116
+ else
117
+ each_record_reuse(&block)
118
+ end
87
119
  end
88
120
 
89
- def each
121
+ private def each_record_copy
122
+ check_closed
123
+
90
124
  return to_enum(__method__) unless block_given?
91
125
 
92
126
  while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
@@ -95,5 +129,88 @@ module HTS
95
129
  end
96
130
  self
97
131
  end
132
+
133
+ private def each_record_reuse
134
+ check_closed
135
+ # Each does not always start at the beginning of the file.
136
+ # This is the common behavior of IO objects in Ruby.
137
+ return to_enum(__method__) unless block_given?
138
+
139
+ bcf1 = LibHTS.bcf_init
140
+ record = Record.new(bcf1, header)
141
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
142
+ self
143
+ end
144
+
145
+ # @!macro [attach] define_getter
146
+ # @method $1
147
+ # Get $1 array
148
+ # @return [Array] the $1 array
149
+ define_getter :chrom
150
+ define_getter :pos
151
+ define_getter :endpos
152
+ define_getter :id
153
+ define_getter :ref
154
+ define_getter :alt
155
+ define_getter :qual
156
+ define_getter :filter
157
+
158
+ def info(key = nil)
159
+ check_closed
160
+ position = tell
161
+ if key
162
+ ary = map { |r| r.info(key) }
163
+ else
164
+ raise NotImplementedError
165
+ # ary = each_copy.map { |r| r.info }
166
+ # ary = map { |r| r.info.clone }
167
+ end
168
+ seek(position)
169
+ ary
170
+ end
171
+
172
+ def format(key = nil)
173
+ check_closed
174
+ position = tell
175
+ if key
176
+ ary = map { |r| r.format(key) }
177
+ else
178
+ raise NotImplementedError
179
+ # ary = each_copy.map { |r| r.format }
180
+ # ary = map { |r| r.format.clone }
181
+ end
182
+ seek(position)
183
+ ary
184
+ end
185
+
186
+ # @!macro [attach] define_iterator
187
+ # @method each_$1
188
+ # Get $1 iterator
189
+ define_iterator :chrom
190
+ define_iterator :pos
191
+ define_iterator :endpos
192
+ define_iterator :id
193
+ define_iterator :ref
194
+ define_iterator :alt
195
+ define_iterator :qual
196
+ define_iterator :filter
197
+
198
+ def each_info(key)
199
+ check_closed
200
+ return to_enum(__method__) unless block
201
+
202
+ each do |r|
203
+ yield r.info(key)
204
+ end
205
+ end
206
+
207
+ def each_format(key)
208
+ check_closed
209
+ return to_enum(__method__) unless block
210
+
211
+ each do |r|
212
+ yield r.format(key)
213
+ end
214
+ end
98
215
  end
99
216
  end
data/lib/hts/faidx.rb CHANGED
@@ -1,28 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
6
5
  module HTS
7
6
  class Faidx
8
- attr_reader :file_path
7
+ attr_reader :file_name
9
8
 
10
- class << self
11
- alias open new
12
- end
9
+ def self.open(*args, **kw)
10
+ file = new(*args, **kw) # do not yield
11
+ return file unless block_given?
13
12
 
14
- def initialize(file_path)
15
- @file_path = File.expand_path(file_path)
16
- @fai = LibHTS.fai_load(file_path)
13
+ begin
14
+ yield file
15
+ ensure
16
+ file.close
17
+ end
18
+ file
19
+ end
17
20
 
18
- # IO like API
21
+ def initialize(file_name)
19
22
  if block_given?
20
- begin
21
- yield self
22
- ensure
23
- close
24
- end
23
+ message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
24
+ raise message
25
25
  end
26
+
27
+ @file_name = file_name
28
+ @fai = LibHTS.fai_load(@file_name)
29
+
30
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
+ end
32
+
33
+ def struct
34
+ @fai
26
35
  end
27
36
 
28
37
  def close
@@ -30,10 +39,10 @@ module HTS
30
39
  end
31
40
 
32
41
  # the number of sequences in the index.
33
- def size
42
+ def length
34
43
  LibHTS.faidx_nseq(@fai)
35
44
  end
36
- alias length size
45
+ alias size length
37
46
 
38
47
  # return the length of the requested chromosome.
39
48
  def chrom_size(chrom)
@@ -49,10 +58,10 @@ module HTS
49
58
  alias chrom_length chrom_size
50
59
 
51
60
  # FIXME: naming and syntax
52
- def cget; end
61
+ # def cget; end
53
62
 
54
63
  # FIXME: naming and syntax
55
- def get; end
64
+ # def get; end
56
65
 
57
66
  # __iter__
58
67
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FFI
4
+ class Pointer
5
+ unless method_defined?(:read_array_of_struct)
6
+ def read_array_of_struct(type, length)
7
+ ary = []
8
+ size = type.size
9
+ tmp = self
10
+ length.times do |j|
11
+ ary << type.new(tmp)
12
+ tmp += size unless j == length - 1 # avoid OOB
13
+ end
14
+ ary
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/hts/hts.rb ADDED
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../htslib"
4
+
5
+ module HTS
6
+ # A base class for hts files.
7
+ class Hts
8
+ class << self
9
+ private
10
+
11
+ def define_getter(name)
12
+ define_method(name) do
13
+ check_closed
14
+ position = tell
15
+ ary = map(&name)
16
+ seek(position)
17
+ ary
18
+ end
19
+ end
20
+
21
+ def define_iterator(name)
22
+ define_method("each_#{name}") do |&block|
23
+ check_closed
24
+ return to_enum(__method__) unless block
25
+
26
+ each do |record|
27
+ block.call(record.public_send(name))
28
+ end
29
+ self
30
+ end
31
+ end
32
+ end
33
+
34
+ def initialize(*args)
35
+ # do nothing
36
+ end
37
+
38
+ def struct
39
+ @hts_file
40
+ end
41
+
42
+ def to_ptr
43
+ @hts_file.to_ptr
44
+ end
45
+
46
+ def file_format
47
+ LibHTS.hts_get_format(@hts_file)[:format].to_s
48
+ end
49
+
50
+ def file_format_version
51
+ v = LibHTS.hts_get_format(@hts_file)[:version]
52
+ major = v[:major]
53
+ minor = v[:minor]
54
+ if minor == -1
55
+ major.to_s
56
+ else
57
+ "#{major}.#{minor}"
58
+ end
59
+ end
60
+
61
+ def close
62
+ return if closed?
63
+
64
+ LibHTS.hts_close(@hts_file)
65
+ @hts_file = nil
66
+ end
67
+
68
+ def closed?
69
+ @hts_file.nil? || @hts_file.null?
70
+ end
71
+
72
+ def set_threads(n)
73
+ raise TypeError unless n.is_a(Integer)
74
+
75
+ if n > 0
76
+ r = LibHTS.hts_set_threads(@hts_file, n)
77
+ raise "Failed to set number of threads: #{threads}" if r < 0
78
+ end
79
+ self
80
+ end
81
+
82
+ def seek(offset)
83
+ if @hts_file[:is_cram] == 1
84
+ LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
85
+ elsif @hts_file[:is_bgzf] == 1
86
+ LibHTS.bgzf_seek(@hts_file[:fp][:bgzf], offset, IO::SEEK_SET)
87
+ else
88
+ LibHTS.hseek(@hts_file[:fp][:hfile], offset, IO::SEEK_SET)
89
+ end
90
+ end
91
+
92
+ def tell
93
+ if @hts_file[:is_cram] == 1
94
+ # LibHTS.cram_tell(@hts_file[:fp][:cram])
95
+ # warn 'cram_tell is not implemented in c htslib'
96
+ nil
97
+ elsif @hts_file[:is_bgzf] == 1
98
+ LibHTS.bgzf_tell(@hts_file[:fp][:bgzf])
99
+ else
100
+ LibHTS.htell(@hts_file[:fp][:hfile])
101
+ end
102
+ end
103
+
104
+ def rewind
105
+ if @start_position
106
+ r = seek(@start_position)
107
+ raise "Failed to rewind: #{r}" if r < 0
108
+
109
+ tell
110
+ else
111
+ raise "Cannot rewind: no start position"
112
+ end
113
+ end
114
+
115
+ private
116
+
117
+ def check_closed
118
+ raise IOError, "closed stream" if closed?
119
+ end
120
+ end
121
+ end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- %i[HFILE string],
21
+ [HFile, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [:HFILE],
27
+ [HFile],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- %i[HFILE pointer size_t],
33
+ [HFile, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -75,6 +75,11 @@ module HTS
75
75
  [BGZF],
76
76
  :int
77
77
 
78
+ # Return a virtual file pointer to the current location in the file.
79
+ def self.bgzf_tell(fp)
80
+ (fp[:block_address] << 16) | (fp[:block_offset] & 0xFFFF)
81
+ end
82
+
78
83
  # Set the file to read from the location specified by _pos_.
79
84
  attach_function \
80
85
  :bgzf_seek,
@@ -176,7 +181,7 @@ module HTS
176
181
  # Load BGZF index from an hFILE
177
182
  attach_function \
178
183
  :bgzf_index_load_hfile,
179
- [BGZF, :HFILE, :string],
184
+ [BGZF, HFile, :string],
180
185
  :int
181
186
 
182
187
  # Save BGZF index
@@ -188,7 +193,7 @@ module HTS
188
193
  # Write a BGZF index to an hFILE
189
194
  attach_function \
190
195
  :bgzf_index_dump_hfile,
191
- [BGZF, :HFILE, :string],
196
+ [BGZF, HFile, :string],
192
197
  :int
193
198
  end
194
199
  end