htslib 0.2.5 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,6 +94,14 @@ module HTS
94
94
  kstr[:s]
95
95
  end
96
96
 
97
+ def name2id(name)
98
+ LibHTS.bcf_hdr_name2id(@bcf_hdr, name)
99
+ end
100
+
101
+ def id2name(id)
102
+ LibHTS.bcf_hdr_id2name(@bcf_hdr, id)
103
+ end
104
+
97
105
  private
98
106
 
99
107
  def bcf_hl_type_to_int(bcf_hl_type)
data/lib/hts/bcf/info.rb CHANGED
@@ -9,31 +9,11 @@ module HTS
9
9
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
10
  end
11
11
 
12
- # For compatibility with HTS.cr.
13
- def get_int(key)
14
- get(key, :int)
15
- end
16
-
17
- # For compatibility with HTS.cr.
18
- def get_float(key)
19
- get(key, :float)
20
- end
21
-
22
- # For compatibility with HTS.cr.
23
- def get_string(key)
24
- get(key, :string)
25
- end
26
-
27
- # For compatibility with HTS.cr.
28
- def get_flag(key)
29
- get(key, :flag)
30
- end
31
-
32
- def [](key)
33
- get(key)
34
- end
35
-
36
12
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
13
+ # @note: Why is this method named "get" instead of "fetch"?
14
+ # This is for compatibility with the Crystal language
15
+ # which provides methods like `get_int`, `get_float`, etc.
16
+ # I think they are better than `fetch_int`` and `fetch_float`.
37
17
  def get(key, type = nil)
38
18
  n = FFI::MemoryPointer.new(:int)
39
19
  p1 = @p1
@@ -70,6 +50,30 @@ module HTS
70
50
  end
71
51
  end
72
52
 
53
+ # For compatibility with HTS.cr.
54
+ def get_int(key)
55
+ get(key, :int)
56
+ end
57
+
58
+ # For compatibility with HTS.cr.
59
+ def get_float(key)
60
+ get(key, :float)
61
+ end
62
+
63
+ # For compatibility with HTS.cr.
64
+ def get_string(key)
65
+ get(key, :string)
66
+ end
67
+
68
+ # For compatibility with HTS.cr.
69
+ def get_flag(key)
70
+ get(key, :flag)
71
+ end
72
+
73
+ def [](key)
74
+ get(key)
75
+ end
76
+
73
77
  # FIXME: naming? room for improvement.
74
78
  def fields
75
79
  keys.map do |key|
@@ -4,8 +4,8 @@ module HTS
4
4
  class Bcf < Hts
5
5
  # A class for working with VCF records.
6
6
  class Record
7
- def initialize(bcf_t, header)
8
- @bcf1 = bcf_t
7
+ def initialize(header, bcf_t = nil)
8
+ @bcf1 = bcf_t || LibHTS.bcf_init
9
9
  @header = header
10
10
  end
11
11
 
data/lib/hts/bcf.rb CHANGED
@@ -54,7 +54,7 @@ module HTS
54
54
  @start_position = tell
55
55
  end
56
56
 
57
- def build_index(index_name = nil, min_shift: 14)
57
+ def build_index(index_name = nil, min_shift: 14, threads: 2)
58
58
  check_closed
59
59
 
60
60
  if index_name
@@ -62,10 +62,15 @@ module HTS
62
62
  else
63
63
  warn "Create index for #{@file_name}"
64
64
  end
65
- r = LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads)
66
- raise "Failed to build index for #{@file_name}" if r < 0
67
-
68
- self
65
+ case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
66
+ when 0 # successful
67
+ when -1 then raise "indexing failed"
68
+ when -2 then raise "opening #{@file_name} failed"
69
+ when -3 then raise "format not indexable"
70
+ when -4 then raise "failed to create and/or save the index"
71
+ else raise "unknown error"
72
+ end
73
+ self # for method chaining
69
74
  end
70
75
 
71
76
  def load_index(index_name = nil)
@@ -93,19 +98,25 @@ module HTS
93
98
  def write_header(header)
94
99
  check_closed
95
100
 
96
- @header = header.dup
101
+ @header = header.clone
97
102
  LibHTS.bcf_hdr_write(@hts_file, header)
98
103
  end
99
104
 
100
- def write(var)
105
+ def header=(header)
106
+ write_header(header)
107
+ end
108
+
109
+ def write(record)
101
110
  check_closed
102
111
 
103
- var_dup = var.dup
104
- r = LibHTS.bcf_write(@hts_file, header, var_dup)
112
+ # record = record.dup
113
+ r = LibHTS.bcf_write(@hts_file, header, record)
105
114
  raise "Failed to write record" if r < 0
106
115
  end
107
116
 
108
- # Close the current file.
117
+ def <<(var)
118
+ write(var)
119
+ end
109
120
 
110
121
  def nsamples
111
122
  check_closed
@@ -119,35 +130,6 @@ module HTS
119
130
  header.samples
120
131
  end
121
132
 
122
- def each(copy: false, &block)
123
- if copy
124
- each_record_copy(&block)
125
- else
126
- each_record_reuse(&block)
127
- end
128
- end
129
-
130
- def query(...)
131
- querys(...) # Fixme
132
- end
133
-
134
- # def queryi
135
- # end
136
-
137
- def querys(region, copy: false, &block)
138
- if copy
139
- querys_copy(region, &block)
140
- else
141
- querys_reuse(region, &block)
142
- end
143
- end
144
-
145
- # private def queryi_copy
146
- # end
147
-
148
- # private def queryi_reuse
149
- # end
150
-
151
133
  # @!macro [attach] define_getter
152
134
  # @method $1
153
135
  # Get $1 array
@@ -164,13 +146,13 @@ module HTS
164
146
  def info(key = nil)
165
147
  check_closed
166
148
  position = tell
167
- if key
168
- ary = map { |r| r.info(key) }
169
- else
170
- raise NotImplementedError
171
- # ary = each_copy.map { |r| r.info }
172
- # ary = map { |r| r.info.clone }
173
- end
149
+ raise NotImplementedError unless key
150
+
151
+ ary = map { |r| r.info(key) }
152
+
153
+ # ary = each_copy.map { |r| r.info }
154
+ # ary = map { |r| r.info.clone }
155
+
174
156
  seek(position)
175
157
  ary
176
158
  end
@@ -178,13 +160,13 @@ module HTS
178
160
  def format(key = nil)
179
161
  check_closed
180
162
  position = tell
181
- if key
182
- ary = map { |r| r.format(key) }
183
- else
184
- raise NotImplementedError
185
- # ary = each_copy.map { |r| r.format }
186
- # ary = map { |r| r.format.clone }
187
- end
163
+ raise NotImplementedError unless key
164
+
165
+ ary = map { |r| r.format(key) }
166
+
167
+ # ary = each_copy.map { |r| r.format }
168
+ # ary = map { |r| r.format.clone }
169
+
188
170
  seek(position)
189
171
  ary
190
172
  end
@@ -219,20 +201,71 @@ module HTS
219
201
  end
220
202
  end
221
203
 
222
- private
204
+ def each(copy: false, &block)
205
+ if copy
206
+ each_record_copy(&block)
207
+ else
208
+ each_record_reuse(&block)
209
+ end
210
+ end
223
211
 
224
- def querys_reuse(region)
212
+ def query(region, beg = nil, end_ = nil, copy: false, &block)
225
213
  check_closed
226
214
 
227
215
  raise "query is only available for BCF files" unless file_format == "bcf"
228
216
  raise "Index file is required to call the query method." unless index_loaded?
217
+
218
+ if beg && end_
219
+ tid = header.name2id(region)
220
+ queryi(tid, beg, end_, copy:, &block)
221
+ elsif beg.nil? && end_.nil?
222
+ querys(region, copy:, &block)
223
+ else
224
+ raise ArgumentError, "beg and end must be specified together"
225
+ end
226
+ end
227
+
228
+ private
229
+
230
+ def queryi(tid, beg, end_, copy: false, &block)
231
+ if copy
232
+ queryi_copy(tid, beg, end_, &block)
233
+ else
234
+ queryi_reuse(tid, beg, end_, &block)
235
+ end
236
+ end
237
+
238
+ def querys(region, copy: false, &block)
239
+ if copy
240
+ querys_copy(region, &block)
241
+ else
242
+ querys_reuse(region, &block)
243
+ end
244
+ end
245
+
246
+ def queryi_reuse(tid, beg, end_, &block)
247
+ return to_enum(__method__, tid, beg, end_) unless block_given?
248
+
249
+ qiter = LibHTS.bcf_itr_queryi(@idx, tid, beg, end_)
250
+ raise "Failed to query region #{tid} #{beg} #{end_}" if qiter.null?
251
+
252
+ query_reuse_yield(qiter, &block)
253
+ self
254
+ end
255
+
256
+ def querys_reuse(region, &block)
229
257
  return to_enum(__method__, region) unless block_given?
230
258
 
231
259
  qiter = LibHTS.bcf_itr_querys(@idx, header, region)
232
260
  raise "Failed to query region #{region}" if qiter.null?
233
261
 
262
+ query_reuse_yield(qiter, &block)
263
+ self
264
+ end
265
+
266
+ def query_reuse_yield(qiter)
234
267
  bcf1 = LibHTS.bcf_init
235
- record = Record.new(bcf1, header)
268
+ record = Record.new(header, bcf1)
236
269
  begin
237
270
  loop do
238
271
  slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
@@ -244,32 +277,39 @@ module HTS
244
277
  ensure
245
278
  LibHTS.bcf_itr_destroy(qiter)
246
279
  end
247
- self
248
280
  end
249
281
 
250
- def querys_copy(region)
251
- check_closed
282
+ def queryi_copy(tid, beg, end_, &block)
283
+ return to_enum(__method__, tid, beg, end_) unless block_given?
252
284
 
253
- raise "query is only available for BCF files" unless file_format == "bcf"
254
- raise "Index file is required to call the query method." unless index_loaded?
285
+ qiter = LibHTS.bcf_itr_queryi(@idx, tid, beg, end_)
286
+ raise "Failed to query region #{tid} #{beg} #{end_}" if qiter.null?
287
+
288
+ query_copy_yield(qiter, &block)
289
+ self
290
+ end
291
+
292
+ def querys_copy(region, &block)
255
293
  return to_enum(__method__, region) unless block_given?
256
294
 
257
295
  qiter = LibHTS.bcf_itr_querys(@idx, header, region)
258
296
  raise "Failed to query region #{region}" if qiter.null?
259
297
 
260
- begin
261
- loop do
262
- bcf1 = LibHTS.bcf_init
263
- slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
264
- break if slen == -1
265
- raise if slen < -1
298
+ query_copy_yield(qiter, &block)
299
+ self
300
+ end
266
301
 
267
- yield Record.new(bcf1, header)
268
- end
269
- ensure
270
- LibHTS.bcf_itr_destroy(qiter)
302
+ def query_copy_yield(qiter)
303
+ loop do
304
+ bcf1 = LibHTS.bcf_init
305
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
306
+ break if slen == -1
307
+ raise if slen < -1
308
+
309
+ yield Record.new(header, bcf1)
271
310
  end
272
- self
311
+ ensure
312
+ LibHTS.bcf_itr_destroy(qiter)
273
313
  end
274
314
 
275
315
  def each_record_reuse
@@ -278,7 +318,7 @@ module HTS
278
318
  return to_enum(__method__) unless block_given?
279
319
 
280
320
  bcf1 = LibHTS.bcf_init
281
- record = Record.new(bcf1, header)
321
+ record = Record.new(header, bcf1)
282
322
  yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
283
323
  self
284
324
  end
@@ -289,7 +329,7 @@ module HTS
289
329
  return to_enum(__method__) unless block_given?
290
330
 
291
331
  while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
292
- record = Record.new(bcf1, header)
332
+ record = Record.new(header, bcf1)
293
333
  yield record
294
334
  end
295
335
  self
@@ -0,0 +1,62 @@
1
+ module HTS
2
+ class Faidx
3
+ class Sequence
4
+ attr_reader :name, :faidx
5
+
6
+ def initialize(faidx, name)
7
+ raise unless faidx.has_key?(name)
8
+
9
+ @faidx = faidx
10
+ @name = name
11
+ end
12
+
13
+ def length
14
+ faidx.seq_len(name)
15
+ end
16
+ alias size length
17
+
18
+ def seq(start = nil, stop = nil)
19
+ faidx.seq(name, start, stop)
20
+ end
21
+
22
+ def qual(start = nil, stop = nil)
23
+ faidx.qual(name, start, stop)
24
+ end
25
+
26
+ def [](arg)
27
+ case arg
28
+ when Integer
29
+ if arg >= 0
30
+ start = arg
31
+ stop = arg
32
+ else
33
+ start = length + arg
34
+ stop = length + arg
35
+ end
36
+ when Range
37
+ arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
38
+ arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
39
+ if arg.begin.nil?
40
+ if arg.end.nil?
41
+ start = nil
42
+ stop = nil
43
+ else
44
+ start = 0
45
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
46
+ end
47
+ elsif arg.end.nil?
48
+ # always include the first base
49
+ start = arg.begin
50
+ stop = length - 1
51
+ else
52
+ start = arg.begin
53
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
54
+ end
55
+ else
56
+ raise ArgumentError
57
+ end
58
+ seq(start, stop)
59
+ end
60
+ end
61
+ end
62
+ end
data/lib/hts/faidx.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "../htslib"
4
+ require_relative "faidx/sequence"
4
5
 
5
6
  module HTS
6
7
  class Faidx
@@ -25,7 +26,12 @@ module HTS
25
26
  end
26
27
 
27
28
  @file_name = file_name
28
- @fai = LibHTS.fai_load(@file_name)
29
+ @fai = case File.extname(@file_name)
30
+ when ".fq", ".fastq"
31
+ LibHTS.fai_load_format(@file_name, 2)
32
+ else
33
+ LibHTS.fai_load(@file_name)
34
+ end
29
35
 
30
36
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
37
  end
@@ -35,13 +41,19 @@ module HTS
35
41
  end
36
42
 
37
43
  def close
44
+ return if closed?
45
+
38
46
  LibHTS.fai_destroy(@fai)
47
+ @fai = nil
39
48
  end
40
49
 
41
- # FIXME: This doesn't seem to work as expected
42
- # def closed?
43
- # @fai.null?
44
- # end
50
+ def closed?
51
+ @fai.nil? || @fai.null?
52
+ end
53
+
54
+ def file_format
55
+ @fai[:format]
56
+ end
45
57
 
46
58
  # the number of sequences in the index.
47
59
  def length
@@ -50,42 +62,85 @@ module HTS
50
62
  alias size length
51
63
 
52
64
  # return the length of the requested chromosome.
53
- def chrom_size(chrom)
65
+ def names
66
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
67
+ end
68
+
69
+ alias keys names
70
+
71
+ def has_key?(key)
72
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
73
+
74
+ key = key.to_s
75
+ case LibHTS.faidx_has_seq(@fai, key)
76
+ when 1 then true
77
+ when 0 then false
78
+ else raise
79
+ end
80
+ end
81
+
82
+ def [](name)
83
+ name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
84
+ Sequence.new(self, name)
85
+ end
86
+
87
+ # return the length of the requested chromosome.
88
+ def seq_len(chrom)
54
89
  raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
55
90
 
56
91
  chrom = chrom.to_s
57
92
  result = LibHTS.faidx_seq_len(@fai, chrom)
58
93
  result == -1 ? nil : result
59
94
  end
60
- alias chrom_length chrom_size
61
95
 
62
- # return the length of the requested chromosome.
63
- def chrom_names
64
- Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
- end
66
-
67
- # @overload fetch(name)
96
+ # @overload seq(name)
68
97
  # Fetch the sequence as a String.
69
98
  # @param name [String] chr1:0-10
70
- # @overload fetch(name, start, stop)
99
+ # @overload seq(name, start, stop)
71
100
  # Fetch the sequence as a String.
72
101
  # @param name [String] the name of the chromosome
73
102
  # @param start [Integer] the start position of the sequence (0-based)
74
103
  # @param stop [Integer] the end position of the sequence (0-based)
75
104
  # @return [String] the sequence
76
105
 
77
- def seq(name, start = nil, stop = nil)
106
+ def fetch_seq(name, start = nil, stop = nil)
107
+ name = name.to_s
108
+ rlen = FFI::MemoryPointer.new(:int)
109
+
110
+ if start.nil? && stop.nil?
111
+ result = LibHTS.fai_fetch64(@fai, name, rlen)
112
+ else
113
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
114
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
115
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
116
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
117
+
118
+ result = LibHTS.faidx_fetch_seq64(@fai, name, start, stop, rlen)
119
+ end
120
+
121
+ case rlen.read_int
122
+ when -2 then raise "Invalid chromosome name: #{name}"
123
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
124
+ end
125
+
126
+ result
127
+ end
128
+
129
+ alias seq fetch_seq
130
+
131
+ def fetch_qual(name, start = nil, stop = nil)
78
132
  name = name.to_s
79
133
  rlen = FFI::MemoryPointer.new(:int)
80
134
 
81
135
  if start.nil? && stop.nil?
82
- result = LibHTS.fai_fetch(@fai, name, rlen)
136
+ result = LibHTS.fai_fetchqual64(@fai, name, rlen)
83
137
  else
84
138
  start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
139
  stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
140
  start > stop && raise(ArgumentError, "Expect start to be <= stop")
141
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
87
142
 
88
- result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
143
+ result = LibHTS.faidx_fetch_qual64(@fai, name, start, stop, rlen)
89
144
  end
90
145
 
91
146
  case rlen.read_int
@@ -95,5 +150,7 @@ module HTS
95
150
 
96
151
  result
97
152
  end
153
+
154
+ alias qual fetch_qual
98
155
  end
99
156
  end
data/lib/hts/hts.rb CHANGED
@@ -69,6 +69,11 @@ module HTS
69
69
  @hts_file.nil? || @hts_file.null?
70
70
  end
71
71
 
72
+ def fai=(fai)
73
+ check_closed
74
+ LibHTS.hts_set_fai_filename(@hts_file, fai) > 0 || raise
75
+ end
76
+
72
77
  def set_threads(n = nil)
73
78
  if n.nil?
74
79
  require "etc"
@@ -107,14 +112,12 @@ module HTS
107
112
  end
108
113
 
109
114
  def rewind
110
- if @start_position
111
- r = seek(@start_position)
112
- raise "Failed to rewind: #{r}" if r < 0
115
+ raise "Cannot rewind: no start position" unless @start_position
113
116
 
114
- tell
115
- else
116
- raise "Cannot rewind: no start position"
117
- end
117
+ r = seek(@start_position)
118
+ raise "Failed to rewind: #{r}" if r < 0
119
+
120
+ tell
118
121
  end
119
122
 
120
123
  private
@@ -41,7 +41,7 @@ module HTS
41
41
  :ssize_t
42
42
 
43
43
  # Write _length_ bytes from _data_ to the file, the index will be used to
44
- # decide the amount of uncompressed data to be writen to each bgzip block.
44
+ # decide the amount of uncompressed data to be written to each bgzip block.
45
45
  attach_function \
46
46
  :bgzf_block_write,
47
47
  [BGZF, :pointer, :size_t],