htslib 0.0.10 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bcf.rb CHANGED
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
5
-
6
3
  require_relative "../htslib"
7
4
 
8
5
  require_relative "hts"
@@ -12,10 +9,11 @@ require_relative "bcf/format"
12
9
  require_relative "bcf/record"
13
10
 
14
11
  module HTS
12
+ # A class for working with VCF, BCF files.
15
13
  class Bcf < Hts
16
14
  include Enumerable
17
15
 
18
- attr_reader :file_name, :index_path, :mode, :header
16
+ attr_reader :file_name, :index_name, :mode, :header
19
17
 
20
18
  def self.open(*args, **kw)
21
19
  file = new(*args, **kw) # do not yield
@@ -29,7 +27,7 @@ module HTS
29
27
  file
30
28
  end
31
29
 
32
- def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
30
+ def initialize(file_name, mode = "r", index: nil, threads: nil,
33
31
  create_index: false)
34
32
  if block_given?
35
33
  message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
@@ -38,55 +36,91 @@ module HTS
38
36
 
39
37
  # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
40
38
 
41
- @file_name = file_name
42
- @mode = mode
43
- @hts_file = LibHTS.hts_open(@file_name, mode)
39
+ @file_name = file_name
40
+ @index_name = index
41
+ @mode = mode
42
+ @hts_file = LibHTS.hts_open(@file_name, mode)
44
43
 
45
44
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
46
45
 
47
- if threads&.> 0
48
- r = LibHTS.hts_set_threads(@hts_file, threads)
49
- raise "Failed to set number of threads: #{threads}" if r < 0
50
- end
46
+ set_threads(threads) if threads
51
47
 
52
48
  return if @mode[0] == "w"
53
49
 
54
50
  @header = Bcf::Header.new(@hts_file)
51
+ create_index(index) if create_index
52
+ @idx = load_index(index)
53
+ @start_position = tell
54
+ super # do nothing
55
+ end
56
+
57
+ def create_index(index_name = nil)
58
+ check_closed
59
+
60
+ warn "Create index for #{@file_name} to #{index_name}"
61
+ if index_name
62
+ LibHTS.bcf_index_build2(@file_name, index_name, -1)
63
+ else
64
+ LibHTS.bcf_index_build(@file_name, -1)
65
+ end
66
+ end
67
+
68
+ def load_index(index_name = nil)
69
+ check_closed
70
+
71
+ if index_name
72
+ LibHTS.bcf_index_load2(@file_name, index_name)
73
+ else
74
+ LibHTS.bcf_index_load3(@file_name, nil, 2)
75
+ end
76
+ end
77
+
78
+ def index_loaded?
79
+ check_closed
80
+
81
+ !@idx.null?
55
82
  end
56
83
 
57
84
  def write_header
85
+ check_closed
86
+
58
87
  @header = header.dup
59
88
  LibHTS.hts_set_fai_filename(header, @file_name)
60
- LibHTS.bcf_hdr_write(@hts_file, header.struct)
89
+ LibHTS.bcf_hdr_write(@hts_file, header)
61
90
  end
62
91
 
63
92
  def write(var)
64
- var_dup = var.dup = var.dup
93
+ check_closed
94
+
95
+ var_dup = var.dup
65
96
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
66
97
  end
67
98
 
68
99
  # Close the current file.
69
- def close
70
- LibHTS.hts_close(@hts_file)
71
- @hts_file = nil
72
- end
73
-
74
- def closed?
75
- @hts_file.nil?
76
- end
77
100
 
78
101
  def nsamples
102
+ check_closed
103
+
79
104
  header.nsamples
80
105
  end
81
106
 
82
107
  def samples
108
+ check_closed
109
+
83
110
  header.samples
84
111
  end
85
112
 
86
- # Iterate over each record.
87
- # Generate a new Record object each time.
88
- # Slower than each.
89
- def each_copy
113
+ def each(copy: false, &block)
114
+ if copy
115
+ each_record_copy(&block)
116
+ else
117
+ each_record_reuse(&block)
118
+ end
119
+ end
120
+
121
+ private def each_record_copy
122
+ check_closed
123
+
90
124
  return to_enum(__method__) unless block_given?
91
125
 
92
126
  while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
@@ -96,10 +130,9 @@ module HTS
96
130
  self
97
131
  end
98
132
 
99
- # Iterate over each record.
100
- # Record object is reused.
101
- # Faster than each_copy.
102
- def each
133
+ private def each_record_reuse
134
+ check_closed
135
+
103
136
  return to_enum(__method__) unless block_given?
104
137
 
105
138
  bcf1 = LibHTS.bcf_init
@@ -107,5 +140,146 @@ module HTS
107
140
  yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
108
141
  self
109
142
  end
143
+
144
+ def query(...)
145
+ querys(...) # Fixme
146
+ end
147
+
148
+ # def queryi
149
+ # end
150
+
151
+ def querys(region, copy: false, &block)
152
+ if copy
153
+ querys_copy(region, &block)
154
+ else
155
+ querys_reuse(region, &block)
156
+ end
157
+ end
158
+
159
+ # private def queryi_copy
160
+ # end
161
+
162
+ # private def queryi_reuse
163
+ # end
164
+
165
+ private def querys_copy(region)
166
+ check_closed
167
+
168
+ raise "query is only available for BCF files" unless file_format == "bcf"
169
+ raise "Index file is required to call the query method." unless index_loaded?
170
+ return to_enum(__method__, region) unless block_given?
171
+
172
+ qitr = LibHTS.bcf_itr_querys(@idx, header, region)
173
+
174
+ begin
175
+ loop do
176
+ bcf1 = LibHTS.bcf_init
177
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
178
+ break if slen == -1
179
+ raise if slen < -1
180
+
181
+ yield Record.new(bcf1, header)
182
+ end
183
+ ensure
184
+ LibHTS.bcf_itr_destroy(qitr)
185
+ end
186
+ self
187
+ end
188
+
189
+ private def querys_reuse(region)
190
+ check_closed
191
+
192
+ raise "query is only available for BCF files" unless file_format == "bcf"
193
+ raise "Index file is required to call the query method." unless index_loaded?
194
+ return to_enum(__method__, region) unless block_given?
195
+
196
+ qitr = LibHTS.bcf_itr_querys(@idx, header, region)
197
+
198
+ bcf1 = LibHTS.bcf_init
199
+ record = Record.new(bcf1, header)
200
+ begin
201
+ loop do
202
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
203
+ break if slen == -1
204
+ raise if slen < -1
205
+
206
+ yield record
207
+ end
208
+ ensure
209
+ LibHTS.bcf_itr_destroy(qitr)
210
+ end
211
+ self
212
+ end
213
+
214
+ # @!macro [attach] define_getter
215
+ # @method $1
216
+ # Get $1 array
217
+ # @return [Array] the $1 array
218
+ define_getter :chrom
219
+ define_getter :pos
220
+ define_getter :endpos
221
+ define_getter :id
222
+ define_getter :ref
223
+ define_getter :alt
224
+ define_getter :qual
225
+ define_getter :filter
226
+
227
+ def info(key = nil)
228
+ check_closed
229
+ position = tell
230
+ if key
231
+ ary = map { |r| r.info(key) }
232
+ else
233
+ raise NotImplementedError
234
+ # ary = each_copy.map { |r| r.info }
235
+ # ary = map { |r| r.info.clone }
236
+ end
237
+ seek(position)
238
+ ary
239
+ end
240
+
241
+ def format(key = nil)
242
+ check_closed
243
+ position = tell
244
+ if key
245
+ ary = map { |r| r.format(key) }
246
+ else
247
+ raise NotImplementedError
248
+ # ary = each_copy.map { |r| r.format }
249
+ # ary = map { |r| r.format.clone }
250
+ end
251
+ seek(position)
252
+ ary
253
+ end
254
+
255
+ # @!macro [attach] define_iterator
256
+ # @method each_$1
257
+ # Get $1 iterator
258
+ define_iterator :chrom
259
+ define_iterator :pos
260
+ define_iterator :endpos
261
+ define_iterator :id
262
+ define_iterator :ref
263
+ define_iterator :alt
264
+ define_iterator :qual
265
+ define_iterator :filter
266
+
267
+ def each_info(key)
268
+ check_closed
269
+ return to_enum(__method__, key) unless block
270
+
271
+ each do |r|
272
+ yield r.info(key)
273
+ end
274
+ end
275
+
276
+ def each_format(key)
277
+ check_closed
278
+ return to_enum(__method__, key) unless block
279
+
280
+ each do |r|
281
+ yield r.format(key)
282
+ end
283
+ end
110
284
  end
111
285
  end
data/lib/hts/faidx.rb CHANGED
@@ -1,48 +1,57 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
5
-
6
3
  require_relative "../htslib"
7
4
 
8
5
  module HTS
9
6
  class Faidx
10
7
  attr_reader :file_name
11
8
 
12
- class << self
13
- alias open new
9
+ def self.open(*args, **kw)
10
+ file = new(*args, **kw) # do not yield
11
+ return file unless block_given?
12
+
13
+ begin
14
+ yield file
15
+ ensure
16
+ file.close
17
+ end
18
+ file
14
19
  end
15
20
 
16
21
  def initialize(file_name)
22
+ if block_given?
23
+ message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
24
+ raise message
25
+ end
26
+
17
27
  @file_name = file_name
18
28
  @fai = LibHTS.fai_load(@file_name)
19
29
 
20
- # IO like API
21
- if block_given?
22
- begin
23
- yield self
24
- ensure
25
- close
26
- end
27
- end
30
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
+ end
32
+
33
+ def struct
34
+ @fai
28
35
  end
29
36
 
30
37
  def close
31
38
  LibHTS.fai_destroy(@fai)
32
39
  end
33
40
 
41
+ # FIXME: This doesn't seem to work as expected
42
+ # def closed?
43
+ # @fai.null?
44
+ # end
45
+
34
46
  # the number of sequences in the index.
35
- def size
47
+ def length
36
48
  LibHTS.faidx_nseq(@fai)
37
49
  end
38
- alias length size
50
+ alias size length
39
51
 
40
52
  # return the length of the requested chromosome.
41
53
  def chrom_size(chrom)
42
- unless chrom.is_a?(String) || chrom.is_a?(Symbol)
43
- # FIXME
44
- raise ArgumentError, "Expect chrom to be String or Symbol"
45
- end
54
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
46
55
 
47
56
  chrom = chrom.to_s
48
57
  result = LibHTS.faidx_seq_len(@fai, chrom)
@@ -50,12 +59,41 @@ module HTS
50
59
  end
51
60
  alias chrom_length chrom_size
52
61
 
53
- # FIXME: naming and syntax
54
- def cget; end
62
+ # return the length of the requested chromosome.
63
+ def chrom_names
64
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
+ end
66
+
67
+ # @overload fetch(name)
68
+ # Fetch the sequence as a String.
69
+ # @param name [String] chr1:0-10
70
+ # @overload fetch(name, start, stop)
71
+ # Fetch the sequence as a String.
72
+ # @param name [String] the name of the chromosome
73
+ # @param start [Integer] the start position of the sequence (0-based)
74
+ # @param stop [Integer] the end position of the sequence (0-based)
75
+ # @return [String] the sequence
76
+
77
+ def seq(name, start = nil, stop = nil)
78
+ name = name.to_s
79
+ rlen = FFI::MemoryPointer.new(:int)
55
80
 
56
- # FIXME: naming and syntax
57
- def get; end
81
+ if start.nil? && stop.nil?
82
+ result = LibHTS.fai_fetch(@fai, name, rlen)
83
+ else
84
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
58
87
 
59
- # __iter__
88
+ result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
89
+ end
90
+
91
+ case rlen.read_int
92
+ when -2 then raise "Invalid chromosome name: #{name}"
93
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
94
+ end
95
+
96
+ result
97
+ end
60
98
  end
61
99
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FFI
4
+ class Pointer
5
+ unless method_defined?(:read_array_of_struct)
6
+ def read_array_of_struct(type, length)
7
+ ary = []
8
+ size = type.size
9
+ tmp = self
10
+ length.times do |j|
11
+ ary << type.new(tmp)
12
+ tmp += size unless j == length - 1 # avoid OOB
13
+ end
14
+ ary
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/hts/hts.rb CHANGED
@@ -1,7 +1,40 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "../htslib"
2
4
 
3
5
  module HTS
6
+ # A base class for hts files.
4
7
  class Hts
8
+ class << self
9
+ private
10
+
11
+ def define_getter(name)
12
+ define_method(name) do
13
+ check_closed
14
+ position = tell
15
+ ary = map(&name)
16
+ seek(position)
17
+ ary
18
+ end
19
+ end
20
+
21
+ def define_iterator(name)
22
+ define_method("each_#{name}") do |&block|
23
+ check_closed
24
+ return to_enum(__method__) unless block
25
+
26
+ each do |record|
27
+ block.call(record.public_send(name))
28
+ end
29
+ self
30
+ end
31
+ end
32
+ end
33
+
34
+ def initialize(*args)
35
+ # do nothing
36
+ end
37
+
5
38
  def struct
6
39
  @hts_file
7
40
  end
@@ -10,11 +43,11 @@ module HTS
10
43
  @hts_file.to_ptr
11
44
  end
12
45
 
13
- def format
46
+ def file_format
14
47
  LibHTS.hts_get_format(@hts_file)[:format].to_s
15
48
  end
16
49
 
17
- def format_version
50
+ def file_format_version
18
51
  v = LibHTS.hts_get_format(@hts_file)[:version]
19
52
  major = v[:major]
20
53
  minor = v[:minor]
@@ -24,7 +57,28 @@ module HTS
24
57
  "#{major}.#{minor}"
25
58
  end
26
59
  end
27
-
60
+
61
+ def close
62
+ return if closed?
63
+
64
+ LibHTS.hts_close(@hts_file)
65
+ @hts_file = nil
66
+ end
67
+
68
+ def closed?
69
+ @hts_file.nil? || @hts_file.null?
70
+ end
71
+
72
+ def set_threads(n)
73
+ raise TypeError unless n.is_a(Integer)
74
+
75
+ if n > 0
76
+ r = LibHTS.hts_set_threads(@hts_file, n)
77
+ raise "Failed to set number of threads: #{threads}" if r < 0
78
+ end
79
+ self
80
+ end
81
+
28
82
  def seek(offset)
29
83
  if @hts_file[:is_cram] == 1
30
84
  LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
@@ -48,9 +102,20 @@ module HTS
48
102
  end
49
103
 
50
104
  def rewind
51
- r = seek(@start_position) if @start_position
52
- raise "Failed to rewind: #{r}" if r < 0
53
- r
105
+ if @start_position
106
+ r = seek(@start_position)
107
+ raise "Failed to rewind: #{r}" if r < 0
108
+
109
+ tell
110
+ else
111
+ raise "Cannot rewind: no start position"
112
+ end
113
+ end
114
+
115
+ private
116
+
117
+ def check_closed
118
+ raise IOError, "closed stream" if closed?
54
119
  end
55
120
  end
56
121
  end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- [HFILE, :string],
21
+ [HFile, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [HFILE],
27
+ [HFile],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- [HFILE, :pointer, :size_t],
33
+ [HFile, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -181,7 +181,7 @@ module HTS
181
181
  # Load BGZF index from an hFILE
182
182
  attach_function \
183
183
  :bgzf_index_load_hfile,
184
- [BGZF, HFILE, :string],
184
+ [BGZF, HFile, :string],
185
185
  :int
186
186
 
187
187
  # Save BGZF index
@@ -193,7 +193,7 @@ module HTS
193
193
  # Write a BGZF index to an hFILE
194
194
  attach_function \
195
195
  :bgzf_index_dump_hfile,
196
- [BGZF, HFILE, :string],
196
+ [BGZF, HFile, :string],
197
197
  :int
198
198
  end
199
199
  end