htslib 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bcf.rb CHANGED
@@ -9,10 +9,11 @@ require_relative "bcf/format"
9
9
  require_relative "bcf/record"
10
10
 
11
11
  module HTS
12
+ # A class for working with VCF, BCF files.
12
13
  class Bcf < Hts
13
14
  include Enumerable
14
15
 
15
- attr_reader :file_name, :index_name, :mode, :header
16
+ attr_reader :file_name, :index_name, :mode, :header, :nthreads
16
17
 
17
18
  def self.open(*args, **kw)
18
19
  file = new(*args, **kw) # do not yield
@@ -26,8 +27,8 @@ module HTS
26
27
  file
27
28
  end
28
29
 
29
- def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
30
- create_index: false)
30
+ def initialize(file_name, mode = "r", index: nil, threads: nil,
31
+ build_index: false)
31
32
  if block_given?
32
33
  message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
33
34
  raise message
@@ -38,36 +39,39 @@ module HTS
38
39
  @file_name = file_name
39
40
  @index_name = index
40
41
  @mode = mode
42
+ @nthreads = threads
41
43
  @hts_file = LibHTS.hts_open(@file_name, mode)
42
44
 
43
45
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
44
46
 
45
- if threads&.> 0
46
- r = LibHTS.hts_set_threads(@hts_file, threads)
47
- raise "Failed to set number of threads: #{threads}" if r < 0
48
- end
47
+ set_threads(threads) if threads
49
48
 
50
49
  return if @mode[0] == "w"
51
50
 
52
51
  @header = Bcf::Header.new(@hts_file)
53
-
54
- create_index(index) if create_index
55
-
52
+ build_index(index) if build_index
56
53
  @idx = load_index(index)
57
-
58
54
  @start_position = tell
55
+ super # do nothing
59
56
  end
60
57
 
61
- def create_index(index_name = nil)
62
- warn "Create index for #{@file_name} to #{index_name}"
63
- if index
64
- LibHTS.bcf_index_build2(@hts_file, index_name, -1)
58
+ def build_index(index_name = nil, min_shift: 14)
59
+ check_closed
60
+
61
+ if index_name
62
+ warn "Create index for #{@file_name} to #{index_name}"
65
63
  else
66
- LibHTS.bcf_index_build(@hts_file, -1)
64
+ warn "Create index for #{@file_name}"
67
65
  end
66
+ r = LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads)
67
+ raise "Failed to build index for #{@file_name}" if r < 0
68
+
69
+ self
68
70
  end
69
71
 
70
72
  def load_index(index_name = nil)
73
+ check_closed
74
+
71
75
  if index_name
72
76
  LibHTS.bcf_index_load2(@file_name, index_name)
73
77
  else
@@ -76,39 +80,50 @@ module HTS
76
80
  end
77
81
 
78
82
  def index_loaded?
83
+ check_closed
84
+
79
85
  !@idx.null?
80
86
  end
81
87
 
82
88
  def write_header
83
- raise IOError, "closed stream" if closed?
89
+ check_closed
84
90
 
85
91
  @header = header.dup
86
92
  LibHTS.hts_set_fai_filename(header, @file_name)
87
- LibHTS.bcf_hdr_write(@hts_file, header.struct)
93
+ LibHTS.bcf_hdr_write(@hts_file, header)
88
94
  end
89
95
 
90
96
  def write(var)
91
- raise IOError, "closed stream" if closed?
97
+ check_closed
92
98
 
93
- var_dup = var.dup = var.dup
99
+ var_dup = var.dup
94
100
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
95
101
  end
96
102
 
97
103
  # Close the current file.
98
104
 
99
105
  def nsamples
106
+ check_closed
107
+
100
108
  header.nsamples
101
109
  end
102
110
 
103
111
  def samples
112
+ check_closed
113
+
104
114
  header.samples
105
115
  end
106
116
 
107
- # Iterate over each record.
108
- # Generate a new Record object each time.
109
- # Slower than each.
110
- def each_copy
111
- raise IOError, "closed stream" if closed?
117
+ def each(copy: false, &block)
118
+ if copy
119
+ each_record_copy(&block)
120
+ else
121
+ each_record_reuse(&block)
122
+ end
123
+ end
124
+
125
+ private def each_record_copy
126
+ check_closed
112
127
 
113
128
  return to_enum(__method__) unless block_given?
114
129
 
@@ -119,11 +134,8 @@ module HTS
119
134
  self
120
135
  end
121
136
 
122
- # Iterate over each record.
123
- # Record object is reused.
124
- # Faster than each_copy.
125
- def each
126
- raise IOError, "closed stream" if closed?
137
+ private def each_record_reuse
138
+ check_closed
127
139
 
128
140
  return to_enum(__method__) unless block_given?
129
141
 
@@ -132,5 +144,146 @@ module HTS
132
144
  yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
133
145
  self
134
146
  end
147
+
148
+ def query(...)
149
+ querys(...) # Fixme
150
+ end
151
+
152
+ # def queryi
153
+ # end
154
+
155
+ def querys(region, copy: false, &block)
156
+ if copy
157
+ querys_copy(region, &block)
158
+ else
159
+ querys_reuse(region, &block)
160
+ end
161
+ end
162
+
163
+ # private def queryi_copy
164
+ # end
165
+
166
+ # private def queryi_reuse
167
+ # end
168
+
169
+ private def querys_copy(region)
170
+ check_closed
171
+
172
+ raise "query is only available for BCF files" unless file_format == "bcf"
173
+ raise "Index file is required to call the query method." unless index_loaded?
174
+ return to_enum(__method__, region) unless block_given?
175
+
176
+ qitr = LibHTS.bcf_itr_querys(@idx, header, region)
177
+
178
+ begin
179
+ loop do
180
+ bcf1 = LibHTS.bcf_init
181
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
182
+ break if slen == -1
183
+ raise if slen < -1
184
+
185
+ yield Record.new(bcf1, header)
186
+ end
187
+ ensure
188
+ LibHTS.bcf_itr_destroy(qitr)
189
+ end
190
+ self
191
+ end
192
+
193
+ private def querys_reuse(region)
194
+ check_closed
195
+
196
+ raise "query is only available for BCF files" unless file_format == "bcf"
197
+ raise "Index file is required to call the query method." unless index_loaded?
198
+ return to_enum(__method__, region) unless block_given?
199
+
200
+ qitr = LibHTS.bcf_itr_querys(@idx, header, region)
201
+
202
+ bcf1 = LibHTS.bcf_init
203
+ record = Record.new(bcf1, header)
204
+ begin
205
+ loop do
206
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
207
+ break if slen == -1
208
+ raise if slen < -1
209
+
210
+ yield record
211
+ end
212
+ ensure
213
+ LibHTS.bcf_itr_destroy(qitr)
214
+ end
215
+ self
216
+ end
217
+
218
+ # @!macro [attach] define_getter
219
+ # @method $1
220
+ # Get $1 array
221
+ # @return [Array] the $1 array
222
+ define_getter :chrom
223
+ define_getter :pos
224
+ define_getter :endpos
225
+ define_getter :id
226
+ define_getter :ref
227
+ define_getter :alt
228
+ define_getter :qual
229
+ define_getter :filter
230
+
231
+ def info(key = nil)
232
+ check_closed
233
+ position = tell
234
+ if key
235
+ ary = map { |r| r.info(key) }
236
+ else
237
+ raise NotImplementedError
238
+ # ary = each_copy.map { |r| r.info }
239
+ # ary = map { |r| r.info.clone }
240
+ end
241
+ seek(position)
242
+ ary
243
+ end
244
+
245
+ def format(key = nil)
246
+ check_closed
247
+ position = tell
248
+ if key
249
+ ary = map { |r| r.format(key) }
250
+ else
251
+ raise NotImplementedError
252
+ # ary = each_copy.map { |r| r.format }
253
+ # ary = map { |r| r.format.clone }
254
+ end
255
+ seek(position)
256
+ ary
257
+ end
258
+
259
+ # @!macro [attach] define_iterator
260
+ # @method each_$1
261
+ # Get $1 iterator
262
+ define_iterator :chrom
263
+ define_iterator :pos
264
+ define_iterator :endpos
265
+ define_iterator :id
266
+ define_iterator :ref
267
+ define_iterator :alt
268
+ define_iterator :qual
269
+ define_iterator :filter
270
+
271
+ def each_info(key)
272
+ check_closed
273
+ return to_enum(__method__, key) unless block
274
+
275
+ each do |r|
276
+ yield r.info(key)
277
+ end
278
+ end
279
+
280
+ def each_format(key)
281
+ check_closed
282
+ return to_enum(__method__, key) unless block
283
+
284
+ each do |r|
285
+ yield r.format(key)
286
+ end
287
+ end
135
288
  end
136
289
  end
data/lib/hts/faidx.rb CHANGED
@@ -6,40 +6,52 @@ module HTS
6
6
  class Faidx
7
7
  attr_reader :file_name
8
8
 
9
- class << self
10
- alias open new
9
+ def self.open(*args, **kw)
10
+ file = new(*args, **kw) # do not yield
11
+ return file unless block_given?
12
+
13
+ begin
14
+ yield file
15
+ ensure
16
+ file.close
17
+ end
18
+ file
11
19
  end
12
20
 
13
21
  def initialize(file_name)
22
+ if block_given?
23
+ message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
24
+ raise message
25
+ end
26
+
14
27
  @file_name = file_name
15
28
  @fai = LibHTS.fai_load(@file_name)
16
29
 
17
- # IO like API
18
- if block_given?
19
- begin
20
- yield self
21
- ensure
22
- close
23
- end
24
- end
30
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
+ end
32
+
33
+ def struct
34
+ @fai
25
35
  end
26
36
 
27
37
  def close
28
38
  LibHTS.fai_destroy(@fai)
29
39
  end
30
40
 
41
+ # FIXME: This doesn't seem to work as expected
42
+ # def closed?
43
+ # @fai.null?
44
+ # end
45
+
31
46
  # the number of sequences in the index.
32
- def size
47
+ def length
33
48
  LibHTS.faidx_nseq(@fai)
34
49
  end
35
- alias length size
50
+ alias size length
36
51
 
37
52
  # return the length of the requested chromosome.
38
53
  def chrom_size(chrom)
39
- unless chrom.is_a?(String) || chrom.is_a?(Symbol)
40
- # FIXME
41
- raise ArgumentError, "Expect chrom to be String or Symbol"
42
- end
54
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
43
55
 
44
56
  chrom = chrom.to_s
45
57
  result = LibHTS.faidx_seq_len(@fai, chrom)
@@ -47,12 +59,41 @@ module HTS
47
59
  end
48
60
  alias chrom_length chrom_size
49
61
 
50
- # FIXME: naming and syntax
51
- def cget; end
62
+ # return the length of the requested chromosome.
63
+ def chrom_names
64
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
+ end
66
+
67
+ # @overload fetch(name)
68
+ # Fetch the sequence as a String.
69
+ # @param name [String] chr1:0-10
70
+ # @overload fetch(name, start, stop)
71
+ # Fetch the sequence as a String.
72
+ # @param name [String] the name of the chromosome
73
+ # @param start [Integer] the start position of the sequence (0-based)
74
+ # @param stop [Integer] the end position of the sequence (0-based)
75
+ # @return [String] the sequence
76
+
77
+ def seq(name, start = nil, stop = nil)
78
+ name = name.to_s
79
+ rlen = FFI::MemoryPointer.new(:int)
52
80
 
53
- # FIXME: naming and syntax
54
- def get; end
81
+ if start.nil? && stop.nil?
82
+ result = LibHTS.fai_fetch(@fai, name, rlen)
83
+ else
84
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
55
87
 
56
- # __iter__
88
+ result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
89
+ end
90
+
91
+ case rlen.read_int
92
+ when -2 then raise "Invalid chromosome name: #{name}"
93
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
94
+ end
95
+
96
+ result
97
+ end
57
98
  end
58
99
  end
data/lib/hts/hts.rb CHANGED
@@ -3,7 +3,38 @@
3
3
  require_relative "../htslib"
4
4
 
5
5
  module HTS
6
+ # A base class for hts files.
6
7
  class Hts
8
+ class << self
9
+ private
10
+
11
+ def define_getter(name)
12
+ define_method(name) do
13
+ check_closed
14
+ position = tell
15
+ ary = map(&name)
16
+ seek(position)
17
+ ary
18
+ end
19
+ end
20
+
21
+ def define_iterator(name)
22
+ define_method("each_#{name}") do |&block|
23
+ check_closed
24
+ return to_enum(__method__) unless block
25
+
26
+ each do |record|
27
+ block.call(record.public_send(name))
28
+ end
29
+ self
30
+ end
31
+ end
32
+ end
33
+
34
+ def initialize(*args)
35
+ # do nothing
36
+ end
37
+
7
38
  def struct
8
39
  @hts_file
9
40
  end
@@ -12,11 +43,11 @@ module HTS
12
43
  @hts_file.to_ptr
13
44
  end
14
45
 
15
- def format
46
+ def file_format
16
47
  LibHTS.hts_get_format(@hts_file)[:format].to_s
17
48
  end
18
49
 
19
- def format_version
50
+ def file_format_version
20
51
  v = LibHTS.hts_get_format(@hts_file)[:version]
21
52
  major = v[:major]
22
53
  minor = v[:minor]
@@ -38,6 +69,21 @@ module HTS
38
69
  @hts_file.nil? || @hts_file.null?
39
70
  end
40
71
 
72
+ def set_threads(n = nil)
73
+ if n.nil?
74
+ require "etc"
75
+ n = [Etc.nprocessors - 1, 1].max
76
+ end
77
+ raise TypeError unless n.is_a?(Integer)
78
+ raise ArgumentError, "Number of threads must be positive" if n < 1
79
+
80
+ r = LibHTS.hts_set_threads(@hts_file, n)
81
+ raise "Failed to set number of threads: #{threads}" if r < 0
82
+
83
+ @nthreads = n
84
+ self
85
+ end
86
+
41
87
  def seek(offset)
42
88
  if @hts_file[:is_cram] == 1
43
89
  LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
@@ -70,5 +116,11 @@ module HTS
70
116
  raise "Cannot rewind: no start position"
71
117
  end
72
118
  end
119
+
120
+ private
121
+
122
+ def check_closed
123
+ raise IOError, "closed stream" if closed?
124
+ end
73
125
  end
74
126
  end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- [HFILE, :string],
21
+ [HFile, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [HFILE],
27
+ [HFile],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- [HFILE, :pointer, :size_t],
33
+ [HFile, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -181,7 +181,7 @@ module HTS
181
181
  # Load BGZF index from an hFILE
182
182
  attach_function \
183
183
  :bgzf_index_load_hfile,
184
- [BGZF, HFILE, :string],
184
+ [BGZF, HFile, :string],
185
185
  :int
186
186
 
187
187
  # Save BGZF index
@@ -193,7 +193,7 @@ module HTS
193
193
  # Write a BGZF index to an hFILE
194
194
  attach_function \
195
195
  :bgzf_index_dump_hfile,
196
- [BGZF, HFILE, :string],
196
+ [BGZF, HFile, :string],
197
197
  :int
198
198
  end
199
199
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
+ # Module for working with C HTSlib.
4
5
  module LibHTS
5
6
  typedef :int64, :hts_pos_t
6
7
  typedef :pointer, :bam_plp_auto_f
@@ -24,9 +25,9 @@ module HTS
24
25
  :f, :pointer # kstream_t
25
26
  end
26
27
 
27
- # HFILE
28
+ # HFile
28
29
 
29
- class HFILE < FFI::BitStruct
30
+ class HFile < FFI::BitStruct
30
31
  layout \
31
32
  :buffer, :string,
32
33
  :begin, :string,
@@ -56,7 +57,7 @@ module HTS
56
57
  :uncompressed_block, :pointer,
57
58
  :compressed_block, :pointer,
58
59
  :cache, :pointer,
59
- :fp, HFILE.ptr,
60
+ :fp, HFile.ptr,
60
61
  :mt, :pointer,
61
62
  :idx, :pointer,
62
63
  :idx_build_otf, :int,
@@ -189,6 +190,16 @@ module HTS
189
190
  )
190
191
  end
191
192
 
193
+ class HtsReglist < FFI::Struct
194
+ layout \
195
+ :reg, :string,
196
+ :intervals, :pointer, # hts_pair_pos_t
197
+ :tid, :int,
198
+ :count, :uint32_t,
199
+ :min_beg, :hts_pos_t,
200
+ :max_end, :hts_pos_t
201
+ end
202
+
192
203
  # HtsFile
193
204
  class SamHdr < FFI::Struct
194
205
  layout \
@@ -217,7 +228,7 @@ module HTS
217
228
  union_layout(
218
229
  :bgzf, BGZF.ptr,
219
230
  :cram, :pointer, # cram_fd
220
- :hfile, HFILE.ptr
231
+ :hfile, HFile.ptr
221
232
  ),
222
233
  :state, :pointer,
223
234
  :format, HtsFormat,
@@ -263,7 +274,7 @@ module HTS
263
274
  :n_reg, :int,
264
275
  :beg, :int64,
265
276
  :end, :int64,
266
- :reg_list, :pointer,
277
+ :reg_list, :pointer, # HtsReglist.ptr,
267
278
  :curr_tid, :int,
268
279
  :curr_reg, :int,
269
280
  :curr_intv, :int,
@@ -392,6 +403,7 @@ module HTS
392
403
  :n, :int
393
404
  end
394
405
 
406
+ # Complete textual representation of a header line
395
407
  class BcfHrec < FFI::Struct
396
408
  layout \
397
409
  :type, :int,
@@ -402,21 +414,6 @@ module HTS
402
414
  :vals, :pointer
403
415
  end
404
416
 
405
- class BcfFmt < FFI::BitStruct
406
- layout \
407
- :id, :int,
408
- :n, :int,
409
- :size, :int,
410
- :type, :int,
411
- :p, :pointer, # uint8_t
412
- :p_len, :uint32,
413
- :_p_off_free, :uint32 # bit_fields
414
-
415
- bit_fields :_p_off_free,
416
- :p_off, 31,
417
- :p_free, 1
418
- end
419
-
420
417
  class BcfInfo < FFI::BitStruct
421
418
  layout \
422
419
  :key, :int,
@@ -466,6 +463,21 @@ module HTS
466
463
  :m, [:int, 3]
467
464
  end
468
465
 
466
+ class BcfFmt < FFI::BitStruct
467
+ layout \
468
+ :id, :int,
469
+ :n, :int,
470
+ :size, :int,
471
+ :type, :int,
472
+ :p, :pointer, # uint8_t
473
+ :p_len, :uint32,
474
+ :_p_off_free, :uint32 # bit_fields
475
+
476
+ bit_fields :_p_off_free,
477
+ :p_off, 31,
478
+ :p_free, 1
479
+ end
480
+
469
481
  class BcfDec < FFI::Struct
470
482
  layout \
471
483
  :m_fmt, :int,
@@ -516,5 +528,15 @@ module HTS
516
528
  LibHTS.bcf_destroy(ptr) unless ptr.null?
517
529
  end
518
530
  end
531
+
532
+ CramContentType = enum(
533
+ :ct_error, -1,
534
+ :file_header, 0,
535
+ :compression_header, 1,
536
+ :mapped_slice, 2,
537
+ :unmapped_slice, 3, # cram v1.0 only
538
+ :external, 4,
539
+ :core, 5
540
+ )
519
541
  end
520
542
  end