htslib 0.0.8 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
+ # A class for working with VCF records.
5
6
  class Record
6
7
  def initialize(bcf_t, header)
7
8
  @bcf1 = bcf_t
@@ -18,57 +19,46 @@ module HTS
18
19
  @bcf1.to_ptr
19
20
  end
20
21
 
21
- # def inspect; end
22
-
23
- def formats; end
22
+ # Get the reference id of the record.
23
+ def rid
24
+ @bcf1[:rid]
25
+ end
24
26
 
25
- def genotypes; end
27
+ def rid=(rid)
28
+ @bcf1[:rid] = rid
29
+ end
26
30
 
31
+ # Get the chromosome of variant.
27
32
  def chrom
28
- rid = @bcf1[:rid]
29
-
30
33
  LibHTS.bcf_hdr_id2name(@header.struct, rid)
31
34
  end
32
35
 
36
+ # Return 0-based position.
33
37
  def pos
34
- @bcf1[:pos] + 1 # FIXME
38
+ @bcf1[:pos]
35
39
  end
36
40
 
37
- def start
38
- @bcf1[:pos]
41
+ def pos=(pos)
42
+ @bcf1[:pos] = pos
39
43
  end
40
44
 
41
- def stop
42
- @bcf1[:pos] + @bcf1[:rlen]
45
+ # Return the 0-based, exclusive end position
46
+ def endpos
47
+ pos + @bcf1[:rlen]
43
48
  end
44
49
 
50
+ # Return the value of the ID column.
45
51
  def id
46
52
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
47
53
  @bcf1[:d][:id]
48
54
  end
49
55
 
50
- def filter
51
- LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
52
- d = @bcf1[:d]
53
- n_flt = d[:n_flt]
54
-
55
- case n_flt
56
- when 0
57
- "PASS"
58
- when 1
59
- i = d[:flt].read_int
60
- LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
61
- when 2
62
- d[:flt].get_array_of_int(0, n_flt).map do |i|
63
- LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
64
- end
65
- else
66
- raise "Unexpected number of filters. n_flt: #{n_flt}"
67
- end
56
+ def id=(id)
57
+ LibHTS.bcf_update_id(@header, @bcf1, id)
68
58
  end
69
59
 
70
- def qual
71
- @bcf1[:qual]
60
+ def clear_id
61
+ LibHTS.bcf_update_id(@header, @bcf1, ".")
72
62
  end
73
63
 
74
64
  def ref
@@ -90,14 +80,52 @@ module HTS
90
80
  ).map(&:read_string)
91
81
  end
92
82
 
93
- def info
83
+ # Get variant quality.
84
+ def qual
85
+ @bcf1[:qual]
86
+ end
87
+
88
+ def qual=(qual)
89
+ @bcf1[:qual] = qual
90
+ end
91
+
92
+ def filter
93
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
94
+ d = @bcf1[:d]
95
+ n_flt = d[:n_flt]
96
+
97
+ case n_flt
98
+ when 0
99
+ "PASS"
100
+ when 1
101
+ id = d[:flt].read_int
102
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, id)
103
+ when 2..nil
104
+ d[:flt].get_array_of_int(0, n_flt).map do |i|
105
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
106
+ end
107
+ else
108
+ raise "Unexpected number of filters. n_flt: #{n_flt}"
109
+ end
110
+ end
111
+
112
+ def info(key = nil)
94
113
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
95
- Info.new(self)
114
+ info = Info.new(self)
115
+ if key
116
+ info.get(key)
117
+ else
118
+ info
119
+ end
96
120
  end
97
121
 
98
- def format
122
+ def format(key = nil)
99
123
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
100
- Format.new(self)
124
+ if key
125
+ Format.new(self).get(key)
126
+ else
127
+ Format.new(self)
128
+ end
101
129
  end
102
130
 
103
131
  def to_s
@@ -109,8 +137,9 @@ module HTS
109
137
 
110
138
  private
111
139
 
112
- def initialize_copy
113
- raise "Not implemented"
140
+ def initialize_copy(orig)\
141
+ @header = orig.header
142
+ @bcf1 = LibHTS.bcf_dup(orig.struct)
114
143
  end
115
144
  end
116
145
  end
data/lib/hts/bcf.rb CHANGED
@@ -1,21 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
5
+ require_relative "hts"
6
6
  require_relative "bcf/header"
7
7
  require_relative "bcf/info"
8
8
  require_relative "bcf/format"
9
9
  require_relative "bcf/record"
10
10
 
11
11
  module HTS
12
- class Bcf
12
+ # A class for working with VCF, BCF files.
13
+ class Bcf < Hts
13
14
  include Enumerable
14
15
 
15
- attr_reader :file_path, :mode, :header
16
+ attr_reader :file_name, :index_name, :mode, :header
16
17
 
17
- def self.open(...)
18
- file = new(...)
18
+ def self.open(*args, **kw)
19
+ file = new(*args, **kw) # do not yield
19
20
  return file unless block_given?
20
21
 
21
22
  begin
@@ -26,67 +27,100 @@ module HTS
26
27
  file
27
28
  end
28
29
 
29
- def initialize(filename, mode = "r", threads: nil)
30
- raise "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead" if block_given?
31
-
32
- @file_path = filename == "-" ? "-" : File.expand_path(filename)
33
-
34
- if mode[0] == "r" && !File.exist?(file_path)
35
- message = "No such VCF/BCF file - #{file_path}"
30
+ def initialize(file_name, mode = "r", index: nil, threads: nil,
31
+ create_index: false)
32
+ if block_given?
33
+ message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
36
34
  raise message
37
35
  end
38
36
 
39
- @mode = mode
40
- @hts_file = LibHTS.hts_open(file_path, mode)
37
+ # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
41
38
 
42
- if threads&.> 0
43
- r = LibHTS.hts_set_threads(@hts_file, threads)
44
- raise "Failed to set number of threads: #{threads}" if r < 0
45
- end
39
+ @file_name = file_name
40
+ @index_name = index
41
+ @mode = mode
42
+ @hts_file = LibHTS.hts_open(@file_name, mode)
43
+
44
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
45
+
46
+ set_threads(threads) if threads
46
47
 
47
- return if mode[0] == "w"
48
+ return if @mode[0] == "w"
48
49
 
49
50
  @header = Bcf::Header.new(@hts_file)
51
+ create_index(index) if create_index
52
+ @idx = load_index(index)
53
+ @start_position = tell
54
+ super # do nothing
50
55
  end
51
56
 
52
- def struct
53
- @hts_file
57
+ def create_index(index_name = nil)
58
+ check_closed
59
+
60
+ warn "Create index for #{@file_name} to #{index_name}"
61
+ if index_name
62
+ LibHTS.bcf_index_build2(@file_name, index_name, -1)
63
+ else
64
+ LibHTS.bcf_index_build(@file_name, -1)
65
+ end
54
66
  end
55
67
 
56
- def to_ptr
57
- @hts_file.to_ptr
68
+ def load_index(index_name = nil)
69
+ check_closed
70
+
71
+ if index_name
72
+ LibHTS.bcf_index_load2(@file_name, index_name)
73
+ else
74
+ LibHTS.bcf_index_load3(@file_name, nil, 2)
75
+ end
76
+ end
77
+
78
+ def index_loaded?
79
+ check_closed
80
+
81
+ !@idx.null?
58
82
  end
59
83
 
60
84
  def write_header
85
+ check_closed
86
+
61
87
  @header = header.dup
62
- LibHTS.hts_set_fai_filename(header, @file_path)
63
- LibHTS.bcf_hdr_write(@hts_file, header.struct)
88
+ LibHTS.hts_set_fai_filename(header, @file_name)
89
+ LibHTS.bcf_hdr_write(@hts_file, header)
64
90
  end
65
91
 
66
92
  def write(var)
67
- var_dup = var.dup = var.dup
93
+ check_closed
94
+
95
+ var_dup = var.dup
68
96
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
69
97
  end
70
98
 
71
99
  # Close the current file.
72
- def close
73
- LibHTS.hts_close(@hts_file)
74
- @hts_file = nil
75
- end
76
100
 
77
- def closed?
78
- @hts_file.nil?
101
+ def nsamples
102
+ check_closed
103
+
104
+ header.nsamples
79
105
  end
80
106
 
81
- def sample_count
82
- header.sample_count
107
+ def samples
108
+ check_closed
109
+
110
+ header.samples
83
111
  end
84
112
 
85
- def sample_names
86
- header.sample_names
113
+ def each(copy: false, &block)
114
+ if copy
115
+ each_record_copy(&block)
116
+ else
117
+ each_record_reuse(&block)
118
+ end
87
119
  end
88
120
 
89
- def each
121
+ private def each_record_copy
122
+ check_closed
123
+
90
124
  return to_enum(__method__) unless block_given?
91
125
 
92
126
  while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
@@ -95,5 +129,88 @@ module HTS
95
129
  end
96
130
  self
97
131
  end
132
+
133
+ private def each_record_reuse
134
+ check_closed
135
+ # Each does not always start at the beginning of the file.
136
+ # This is the common behavior of IO objects in Ruby.
137
+ return to_enum(__method__) unless block_given?
138
+
139
+ bcf1 = LibHTS.bcf_init
140
+ record = Record.new(bcf1, header)
141
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
142
+ self
143
+ end
144
+
145
+ # @!macro [attach] define_getter
146
+ # @method $1
147
+ # Get $1 array
148
+ # @return [Array] the $1 array
149
+ define_getter :chrom
150
+ define_getter :pos
151
+ define_getter :endpos
152
+ define_getter :id
153
+ define_getter :ref
154
+ define_getter :alt
155
+ define_getter :qual
156
+ define_getter :filter
157
+
158
+ def info(key = nil)
159
+ check_closed
160
+ position = tell
161
+ if key
162
+ ary = map { |r| r.info(key) }
163
+ else
164
+ raise NotImplementedError
165
+ # ary = each_copy.map { |r| r.info }
166
+ # ary = map { |r| r.info.clone }
167
+ end
168
+ seek(position)
169
+ ary
170
+ end
171
+
172
+ def format(key = nil)
173
+ check_closed
174
+ position = tell
175
+ if key
176
+ ary = map { |r| r.format(key) }
177
+ else
178
+ raise NotImplementedError
179
+ # ary = each_copy.map { |r| r.format }
180
+ # ary = map { |r| r.format.clone }
181
+ end
182
+ seek(position)
183
+ ary
184
+ end
185
+
186
+ # @!macro [attach] define_iterator
187
+ # @method each_$1
188
+ # Get $1 iterator
189
+ define_iterator :chrom
190
+ define_iterator :pos
191
+ define_iterator :endpos
192
+ define_iterator :id
193
+ define_iterator :ref
194
+ define_iterator :alt
195
+ define_iterator :qual
196
+ define_iterator :filter
197
+
198
+ def each_info(key)
199
+ check_closed
200
+ return to_enum(__method__) unless block
201
+
202
+ each do |r|
203
+ yield r.info(key)
204
+ end
205
+ end
206
+
207
+ def each_format(key)
208
+ check_closed
209
+ return to_enum(__method__) unless block
210
+
211
+ each do |r|
212
+ yield r.format(key)
213
+ end
214
+ end
98
215
  end
99
216
  end
data/lib/hts/faidx.rb CHANGED
@@ -1,28 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
6
5
  module HTS
7
6
  class Faidx
8
- attr_reader :file_path
7
+ attr_reader :file_name
9
8
 
10
- class << self
11
- alias open new
12
- end
9
+ def self.open(*args, **kw)
10
+ file = new(*args, **kw) # do not yield
11
+ return file unless block_given?
13
12
 
14
- def initialize(file_path)
15
- @file_path = File.expand_path(file_path)
16
- @fai = LibHTS.fai_load(file_path)
13
+ begin
14
+ yield file
15
+ ensure
16
+ file.close
17
+ end
18
+ file
19
+ end
17
20
 
18
- # IO like API
21
+ def initialize(file_name)
19
22
  if block_given?
20
- begin
21
- yield self
22
- ensure
23
- close
24
- end
23
+ message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
24
+ raise message
25
25
  end
26
+
27
+ @file_name = file_name
28
+ @fai = LibHTS.fai_load(@file_name)
29
+
30
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
+ end
32
+
33
+ def struct
34
+ @fai
26
35
  end
27
36
 
28
37
  def close
@@ -30,10 +39,10 @@ module HTS
30
39
  end
31
40
 
32
41
  # the number of sequences in the index.
33
- def size
42
+ def length
34
43
  LibHTS.faidx_nseq(@fai)
35
44
  end
36
- alias length size
45
+ alias size length
37
46
 
38
47
  # return the length of the requested chromosome.
39
48
  def chrom_size(chrom)
@@ -49,10 +58,10 @@ module HTS
49
58
  alias chrom_length chrom_size
50
59
 
51
60
  # FIXME: naming and syntax
52
- def cget; end
61
+ # def cget; end
53
62
 
54
63
  # FIXME: naming and syntax
55
- def get; end
64
+ # def get; end
56
65
 
57
66
  # __iter__
58
67
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FFI
4
+ class Pointer
5
+ unless method_defined?(:read_array_of_struct)
6
+ def read_array_of_struct(type, length)
7
+ ary = []
8
+ size = type.size
9
+ tmp = self
10
+ length.times do |j|
11
+ ary << type.new(tmp)
12
+ tmp += size unless j == length - 1 # avoid OOB
13
+ end
14
+ ary
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/hts/hts.rb ADDED
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../htslib"
4
+
5
+ module HTS
6
+ # A base class for hts files.
7
+ class Hts
8
+ class << self
9
+ private
10
+
11
+ def define_getter(name)
12
+ define_method(name) do
13
+ check_closed
14
+ position = tell
15
+ ary = map(&name)
16
+ seek(position)
17
+ ary
18
+ end
19
+ end
20
+
21
+ def define_iterator(name)
22
+ define_method("each_#{name}") do |&block|
23
+ check_closed
24
+ return to_enum(__method__) unless block
25
+
26
+ each do |record|
27
+ block.call(record.public_send(name))
28
+ end
29
+ self
30
+ end
31
+ end
32
+ end
33
+
34
+ def initialize(*args)
35
+ # do nothing
36
+ end
37
+
38
+ def struct
39
+ @hts_file
40
+ end
41
+
42
+ def to_ptr
43
+ @hts_file.to_ptr
44
+ end
45
+
46
+ def file_format
47
+ LibHTS.hts_get_format(@hts_file)[:format].to_s
48
+ end
49
+
50
+ def file_format_version
51
+ v = LibHTS.hts_get_format(@hts_file)[:version]
52
+ major = v[:major]
53
+ minor = v[:minor]
54
+ if minor == -1
55
+ major.to_s
56
+ else
57
+ "#{major}.#{minor}"
58
+ end
59
+ end
60
+
61
+ def close
62
+ return if closed?
63
+
64
+ LibHTS.hts_close(@hts_file)
65
+ @hts_file = nil
66
+ end
67
+
68
+ def closed?
69
+ @hts_file.nil? || @hts_file.null?
70
+ end
71
+
72
+ def set_threads(n)
73
+ raise TypeError unless n.is_a(Integer)
74
+
75
+ if n > 0
76
+ r = LibHTS.hts_set_threads(@hts_file, n)
77
+ raise "Failed to set number of threads: #{threads}" if r < 0
78
+ end
79
+ self
80
+ end
81
+
82
+ def seek(offset)
83
+ if @hts_file[:is_cram] == 1
84
+ LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
85
+ elsif @hts_file[:is_bgzf] == 1
86
+ LibHTS.bgzf_seek(@hts_file[:fp][:bgzf], offset, IO::SEEK_SET)
87
+ else
88
+ LibHTS.hseek(@hts_file[:fp][:hfile], offset, IO::SEEK_SET)
89
+ end
90
+ end
91
+
92
+ def tell
93
+ if @hts_file[:is_cram] == 1
94
+ # LibHTS.cram_tell(@hts_file[:fp][:cram])
95
+ # warn 'cram_tell is not implemented in c htslib'
96
+ nil
97
+ elsif @hts_file[:is_bgzf] == 1
98
+ LibHTS.bgzf_tell(@hts_file[:fp][:bgzf])
99
+ else
100
+ LibHTS.htell(@hts_file[:fp][:hfile])
101
+ end
102
+ end
103
+
104
+ def rewind
105
+ if @start_position
106
+ r = seek(@start_position)
107
+ raise "Failed to rewind: #{r}" if r < 0
108
+
109
+ tell
110
+ else
111
+ raise "Cannot rewind: no start position"
112
+ end
113
+ end
114
+
115
+ private
116
+
117
+ def check_closed
118
+ raise IOError, "closed stream" if closed?
119
+ end
120
+ end
121
+ end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- %i[HFILE string],
21
+ [HFile, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [:HFILE],
27
+ [HFile],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- %i[HFILE pointer size_t],
33
+ [HFile, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -75,6 +75,11 @@ module HTS
75
75
  [BGZF],
76
76
  :int
77
77
 
78
+ # Return a virtual file pointer to the current location in the file.
79
+ def self.bgzf_tell(fp)
80
+ (fp[:block_address] << 16) | (fp[:block_offset] & 0xFFFF)
81
+ end
82
+
78
83
  # Set the file to read from the location specified by _pos_.
79
84
  attach_function \
80
85
  :bgzf_seek,
@@ -176,7 +181,7 @@ module HTS
176
181
  # Load BGZF index from an hFILE
177
182
  attach_function \
178
183
  :bgzf_index_load_hfile,
179
- [BGZF, :HFILE, :string],
184
+ [BGZF, HFile, :string],
180
185
  :int
181
186
 
182
187
  # Save BGZF index
@@ -188,7 +193,7 @@ module HTS
188
193
  # Write a BGZF index to an hFILE
189
194
  attach_function \
190
195
  :bgzf_index_dump_hfile,
191
- [BGZF, :HFILE, :string],
196
+ [BGZF, HFile, :string],
192
197
  :int
193
198
  end
194
199
  end