htslib 0.1.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/hts/bcf.rb CHANGED
@@ -9,10 +9,11 @@ require_relative "bcf/format"
9
9
  require_relative "bcf/record"
10
10
 
11
11
  module HTS
12
+ # A class for working with VCF, BCF files.
12
13
  class Bcf < Hts
13
14
  include Enumerable
14
15
 
15
- attr_reader :file_name, :index_name, :mode, :header
16
+ attr_reader :file_name, :index_name, :mode, :header, :nthreads
16
17
 
17
18
  def self.open(*args, **kw)
18
19
  file = new(*args, **kw) # do not yield
@@ -26,8 +27,8 @@ module HTS
26
27
  file
27
28
  end
28
29
 
29
- def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
30
- create_index: false)
30
+ def initialize(file_name, mode = "r", index: nil, threads: nil,
31
+ build_index: false)
31
32
  if block_given?
32
33
  message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
33
34
  raise message
@@ -38,36 +39,39 @@ module HTS
38
39
  @file_name = file_name
39
40
  @index_name = index
40
41
  @mode = mode
42
+ @nthreads = threads
41
43
  @hts_file = LibHTS.hts_open(@file_name, mode)
42
44
 
43
45
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
44
46
 
45
- if threads&.> 0
46
- r = LibHTS.hts_set_threads(@hts_file, threads)
47
- raise "Failed to set number of threads: #{threads}" if r < 0
48
- end
47
+ set_threads(threads) if threads
49
48
 
50
49
  return if @mode[0] == "w"
51
50
 
52
51
  @header = Bcf::Header.new(@hts_file)
53
-
54
- create_index(index) if create_index
55
-
52
+ build_index(index) if build_index
56
53
  @idx = load_index(index)
57
-
58
54
  @start_position = tell
55
+ super # do nothing
59
56
  end
60
57
 
61
- def create_index(index_name = nil)
62
- warn "Create index for #{@file_name} to #{index_name}"
63
- if index
64
- LibHTS.bcf_index_build2(@hts_file, index_name, -1)
58
+ def build_index(index_name = nil, min_shift: 14)
59
+ check_closed
60
+
61
+ if index_name
62
+ warn "Create index for #{@file_name} to #{index_name}"
65
63
  else
66
- LibHTS.bcf_index_build(@hts_file, -1)
64
+ warn "Create index for #{@file_name}"
67
65
  end
66
+ r = LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads)
67
+ raise "Failed to build index for #{@file_name}" if r < 0
68
+
69
+ self
68
70
  end
69
71
 
70
72
  def load_index(index_name = nil)
73
+ check_closed
74
+
71
75
  if index_name
72
76
  LibHTS.bcf_index_load2(@file_name, index_name)
73
77
  else
@@ -76,39 +80,50 @@ module HTS
76
80
  end
77
81
 
78
82
  def index_loaded?
83
+ check_closed
84
+
79
85
  !@idx.null?
80
86
  end
81
87
 
82
88
  def write_header
83
- raise IOError, "closed stream" if closed?
89
+ check_closed
84
90
 
85
91
  @header = header.dup
86
92
  LibHTS.hts_set_fai_filename(header, @file_name)
87
- LibHTS.bcf_hdr_write(@hts_file, header.struct)
93
+ LibHTS.bcf_hdr_write(@hts_file, header)
88
94
  end
89
95
 
90
96
  def write(var)
91
- raise IOError, "closed stream" if closed?
97
+ check_closed
92
98
 
93
- var_dup = var.dup = var.dup
99
+ var_dup = var.dup
94
100
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
95
101
  end
96
102
 
97
103
  # Close the current file.
98
104
 
99
105
  def nsamples
106
+ check_closed
107
+
100
108
  header.nsamples
101
109
  end
102
110
 
103
111
  def samples
112
+ check_closed
113
+
104
114
  header.samples
105
115
  end
106
116
 
107
- # Iterate over each record.
108
- # Generate a new Record object each time.
109
- # Slower than each.
110
- def each_copy
111
- raise IOError, "closed stream" if closed?
117
+ def each(copy: false, &block)
118
+ if copy
119
+ each_record_copy(&block)
120
+ else
121
+ each_record_reuse(&block)
122
+ end
123
+ end
124
+
125
+ private def each_record_copy
126
+ check_closed
112
127
 
113
128
  return to_enum(__method__) unless block_given?
114
129
 
@@ -119,11 +134,8 @@ module HTS
119
134
  self
120
135
  end
121
136
 
122
- # Iterate over each record.
123
- # Record object is reused.
124
- # Faster than each_copy.
125
- def each
126
- raise IOError, "closed stream" if closed?
137
+ private def each_record_reuse
138
+ check_closed
127
139
 
128
140
  return to_enum(__method__) unless block_given?
129
141
 
@@ -132,5 +144,146 @@ module HTS
132
144
  yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
133
145
  self
134
146
  end
147
+
148
+ def query(...)
149
+ querys(...) # Fixme
150
+ end
151
+
152
+ # def queryi
153
+ # end
154
+
155
+ def querys(region, copy: false, &block)
156
+ if copy
157
+ querys_copy(region, &block)
158
+ else
159
+ querys_reuse(region, &block)
160
+ end
161
+ end
162
+
163
+ # private def queryi_copy
164
+ # end
165
+
166
+ # private def queryi_reuse
167
+ # end
168
+
169
+ private def querys_copy(region)
170
+ check_closed
171
+
172
+ raise "query is only available for BCF files" unless file_format == "bcf"
173
+ raise "Index file is required to call the query method." unless index_loaded?
174
+ return to_enum(__method__, region) unless block_given?
175
+
176
+ qitr = LibHTS.bcf_itr_querys(@idx, header, region)
177
+
178
+ begin
179
+ loop do
180
+ bcf1 = LibHTS.bcf_init
181
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
182
+ break if slen == -1
183
+ raise if slen < -1
184
+
185
+ yield Record.new(bcf1, header)
186
+ end
187
+ ensure
188
+ LibHTS.bcf_itr_destroy(qitr)
189
+ end
190
+ self
191
+ end
192
+
193
+ private def querys_reuse(region)
194
+ check_closed
195
+
196
+ raise "query is only available for BCF files" unless file_format == "bcf"
197
+ raise "Index file is required to call the query method." unless index_loaded?
198
+ return to_enum(__method__, region) unless block_given?
199
+
200
+ qitr = LibHTS.bcf_itr_querys(@idx, header, region)
201
+
202
+ bcf1 = LibHTS.bcf_init
203
+ record = Record.new(bcf1, header)
204
+ begin
205
+ loop do
206
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
207
+ break if slen == -1
208
+ raise if slen < -1
209
+
210
+ yield record
211
+ end
212
+ ensure
213
+ LibHTS.bcf_itr_destroy(qitr)
214
+ end
215
+ self
216
+ end
217
+
218
+ # @!macro [attach] define_getter
219
+ # @method $1
220
+ # Get $1 array
221
+ # @return [Array] the $1 array
222
+ define_getter :chrom
223
+ define_getter :pos
224
+ define_getter :endpos
225
+ define_getter :id
226
+ define_getter :ref
227
+ define_getter :alt
228
+ define_getter :qual
229
+ define_getter :filter
230
+
231
+ def info(key = nil)
232
+ check_closed
233
+ position = tell
234
+ if key
235
+ ary = map { |r| r.info(key) }
236
+ else
237
+ raise NotImplementedError
238
+ # ary = each_copy.map { |r| r.info }
239
+ # ary = map { |r| r.info.clone }
240
+ end
241
+ seek(position)
242
+ ary
243
+ end
244
+
245
+ def format(key = nil)
246
+ check_closed
247
+ position = tell
248
+ if key
249
+ ary = map { |r| r.format(key) }
250
+ else
251
+ raise NotImplementedError
252
+ # ary = each_copy.map { |r| r.format }
253
+ # ary = map { |r| r.format.clone }
254
+ end
255
+ seek(position)
256
+ ary
257
+ end
258
+
259
+ # @!macro [attach] define_iterator
260
+ # @method each_$1
261
+ # Get $1 iterator
262
+ define_iterator :chrom
263
+ define_iterator :pos
264
+ define_iterator :endpos
265
+ define_iterator :id
266
+ define_iterator :ref
267
+ define_iterator :alt
268
+ define_iterator :qual
269
+ define_iterator :filter
270
+
271
+ def each_info(key)
272
+ check_closed
273
+ return to_enum(__method__, key) unless block
274
+
275
+ each do |r|
276
+ yield r.info(key)
277
+ end
278
+ end
279
+
280
+ def each_format(key)
281
+ check_closed
282
+ return to_enum(__method__, key) unless block
283
+
284
+ each do |r|
285
+ yield r.format(key)
286
+ end
287
+ end
135
288
  end
136
289
  end
data/lib/hts/faidx.rb CHANGED
@@ -6,40 +6,52 @@ module HTS
6
6
  class Faidx
7
7
  attr_reader :file_name
8
8
 
9
- class << self
10
- alias open new
9
+ def self.open(*args, **kw)
10
+ file = new(*args, **kw) # do not yield
11
+ return file unless block_given?
12
+
13
+ begin
14
+ yield file
15
+ ensure
16
+ file.close
17
+ end
18
+ file
11
19
  end
12
20
 
13
21
  def initialize(file_name)
22
+ if block_given?
23
+ message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
24
+ raise message
25
+ end
26
+
14
27
  @file_name = file_name
15
28
  @fai = LibHTS.fai_load(@file_name)
16
29
 
17
- # IO like API
18
- if block_given?
19
- begin
20
- yield self
21
- ensure
22
- close
23
- end
24
- end
30
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
+ end
32
+
33
+ def struct
34
+ @fai
25
35
  end
26
36
 
27
37
  def close
28
38
  LibHTS.fai_destroy(@fai)
29
39
  end
30
40
 
41
+ # FIXME: This doesn't seem to work as expected
42
+ # def closed?
43
+ # @fai.null?
44
+ # end
45
+
31
46
  # the number of sequences in the index.
32
- def size
47
+ def length
33
48
  LibHTS.faidx_nseq(@fai)
34
49
  end
35
- alias length size
50
+ alias size length
36
51
 
37
52
  # return the length of the requested chromosome.
38
53
  def chrom_size(chrom)
39
- unless chrom.is_a?(String) || chrom.is_a?(Symbol)
40
- # FIXME
41
- raise ArgumentError, "Expect chrom to be String or Symbol"
42
- end
54
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
43
55
 
44
56
  chrom = chrom.to_s
45
57
  result = LibHTS.faidx_seq_len(@fai, chrom)
@@ -47,12 +59,41 @@ module HTS
47
59
  end
48
60
  alias chrom_length chrom_size
49
61
 
50
- # FIXME: naming and syntax
51
- def cget; end
62
+ # return the length of the requested chromosome.
63
+ def chrom_names
64
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
+ end
66
+
67
+ # @overload fetch(name)
68
+ # Fetch the sequence as a String.
69
+ # @param name [String] chr1:0-10
70
+ # @overload fetch(name, start, stop)
71
+ # Fetch the sequence as a String.
72
+ # @param name [String] the name of the chromosome
73
+ # @param start [Integer] the start position of the sequence (0-based)
74
+ # @param stop [Integer] the end position of the sequence (0-based)
75
+ # @return [String] the sequence
76
+
77
+ def seq(name, start = nil, stop = nil)
78
+ name = name.to_s
79
+ rlen = FFI::MemoryPointer.new(:int)
52
80
 
53
- # FIXME: naming and syntax
54
- def get; end
81
+ if start.nil? && stop.nil?
82
+ result = LibHTS.fai_fetch(@fai, name, rlen)
83
+ else
84
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
55
87
 
56
- # __iter__
88
+ result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
89
+ end
90
+
91
+ case rlen.read_int
92
+ when -2 then raise "Invalid chromosome name: #{name}"
93
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
94
+ end
95
+
96
+ result
97
+ end
57
98
  end
58
99
  end
data/lib/hts/hts.rb CHANGED
@@ -3,7 +3,38 @@
3
3
  require_relative "../htslib"
4
4
 
5
5
  module HTS
6
+ # A base class for hts files.
6
7
  class Hts
8
+ class << self
9
+ private
10
+
11
+ def define_getter(name)
12
+ define_method(name) do
13
+ check_closed
14
+ position = tell
15
+ ary = map(&name)
16
+ seek(position)
17
+ ary
18
+ end
19
+ end
20
+
21
+ def define_iterator(name)
22
+ define_method("each_#{name}") do |&block|
23
+ check_closed
24
+ return to_enum(__method__) unless block
25
+
26
+ each do |record|
27
+ block.call(record.public_send(name))
28
+ end
29
+ self
30
+ end
31
+ end
32
+ end
33
+
34
+ def initialize(*args)
35
+ # do nothing
36
+ end
37
+
7
38
  def struct
8
39
  @hts_file
9
40
  end
@@ -12,11 +43,11 @@ module HTS
12
43
  @hts_file.to_ptr
13
44
  end
14
45
 
15
- def format
46
+ def file_format
16
47
  LibHTS.hts_get_format(@hts_file)[:format].to_s
17
48
  end
18
49
 
19
- def format_version
50
+ def file_format_version
20
51
  v = LibHTS.hts_get_format(@hts_file)[:version]
21
52
  major = v[:major]
22
53
  minor = v[:minor]
@@ -38,6 +69,21 @@ module HTS
38
69
  @hts_file.nil? || @hts_file.null?
39
70
  end
40
71
 
72
+ def set_threads(n = nil)
73
+ if n.nil?
74
+ require "etc"
75
+ n = [Etc.nprocessors - 1, 1].max
76
+ end
77
+ raise TypeError unless n.is_a?(Integer)
78
+ raise ArgumentError, "Number of threads must be positive" if n < 1
79
+
80
+ r = LibHTS.hts_set_threads(@hts_file, n)
81
+ raise "Failed to set number of threads: #{threads}" if r < 0
82
+
83
+ @nthreads = n
84
+ self
85
+ end
86
+
41
87
  def seek(offset)
42
88
  if @hts_file[:is_cram] == 1
43
89
  LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
@@ -70,5 +116,11 @@ module HTS
70
116
  raise "Cannot rewind: no start position"
71
117
  end
72
118
  end
119
+
120
+ private
121
+
122
+ def check_closed
123
+ raise IOError, "closed stream" if closed?
124
+ end
73
125
  end
74
126
  end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- [HFILE, :string],
21
+ [HFile, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [HFILE],
27
+ [HFile],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- [HFILE, :pointer, :size_t],
33
+ [HFile, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -181,7 +181,7 @@ module HTS
181
181
  # Load BGZF index from an hFILE
182
182
  attach_function \
183
183
  :bgzf_index_load_hfile,
184
- [BGZF, HFILE, :string],
184
+ [BGZF, HFile, :string],
185
185
  :int
186
186
 
187
187
  # Save BGZF index
@@ -193,7 +193,7 @@ module HTS
193
193
  # Write a BGZF index to an hFILE
194
194
  attach_function \
195
195
  :bgzf_index_dump_hfile,
196
- [BGZF, HFILE, :string],
196
+ [BGZF, HFile, :string],
197
197
  :int
198
198
  end
199
199
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
+ # Module for working with C HTSlib.
4
5
  module LibHTS
5
6
  typedef :int64, :hts_pos_t
6
7
  typedef :pointer, :bam_plp_auto_f
@@ -24,9 +25,9 @@ module HTS
24
25
  :f, :pointer # kstream_t
25
26
  end
26
27
 
27
- # HFILE
28
+ # HFile
28
29
 
29
- class HFILE < FFI::BitStruct
30
+ class HFile < FFI::BitStruct
30
31
  layout \
31
32
  :buffer, :string,
32
33
  :begin, :string,
@@ -56,7 +57,7 @@ module HTS
56
57
  :uncompressed_block, :pointer,
57
58
  :compressed_block, :pointer,
58
59
  :cache, :pointer,
59
- :fp, HFILE.ptr,
60
+ :fp, HFile.ptr,
60
61
  :mt, :pointer,
61
62
  :idx, :pointer,
62
63
  :idx_build_otf, :int,
@@ -189,6 +190,16 @@ module HTS
189
190
  )
190
191
  end
191
192
 
193
+ class HtsReglist < FFI::Struct
194
+ layout \
195
+ :reg, :string,
196
+ :intervals, :pointer, # hts_pair_pos_t
197
+ :tid, :int,
198
+ :count, :uint32_t,
199
+ :min_beg, :hts_pos_t,
200
+ :max_end, :hts_pos_t
201
+ end
202
+
192
203
  # HtsFile
193
204
  class SamHdr < FFI::Struct
194
205
  layout \
@@ -217,7 +228,7 @@ module HTS
217
228
  union_layout(
218
229
  :bgzf, BGZF.ptr,
219
230
  :cram, :pointer, # cram_fd
220
- :hfile, HFILE.ptr
231
+ :hfile, HFile.ptr
221
232
  ),
222
233
  :state, :pointer,
223
234
  :format, HtsFormat,
@@ -263,7 +274,7 @@ module HTS
263
274
  :n_reg, :int,
264
275
  :beg, :int64,
265
276
  :end, :int64,
266
- :reg_list, :pointer,
277
+ :reg_list, :pointer, # HtsReglist.ptr,
267
278
  :curr_tid, :int,
268
279
  :curr_reg, :int,
269
280
  :curr_intv, :int,
@@ -392,6 +403,7 @@ module HTS
392
403
  :n, :int
393
404
  end
394
405
 
406
+ # Complete textual representation of a header line
395
407
  class BcfHrec < FFI::Struct
396
408
  layout \
397
409
  :type, :int,
@@ -402,21 +414,6 @@ module HTS
402
414
  :vals, :pointer
403
415
  end
404
416
 
405
- class BcfFmt < FFI::BitStruct
406
- layout \
407
- :id, :int,
408
- :n, :int,
409
- :size, :int,
410
- :type, :int,
411
- :p, :pointer, # uint8_t
412
- :p_len, :uint32,
413
- :_p_off_free, :uint32 # bit_fields
414
-
415
- bit_fields :_p_off_free,
416
- :p_off, 31,
417
- :p_free, 1
418
- end
419
-
420
417
  class BcfInfo < FFI::BitStruct
421
418
  layout \
422
419
  :key, :int,
@@ -466,6 +463,21 @@ module HTS
466
463
  :m, [:int, 3]
467
464
  end
468
465
 
466
+ class BcfFmt < FFI::BitStruct
467
+ layout \
468
+ :id, :int,
469
+ :n, :int,
470
+ :size, :int,
471
+ :type, :int,
472
+ :p, :pointer, # uint8_t
473
+ :p_len, :uint32,
474
+ :_p_off_free, :uint32 # bit_fields
475
+
476
+ bit_fields :_p_off_free,
477
+ :p_off, 31,
478
+ :p_free, 1
479
+ end
480
+
469
481
  class BcfDec < FFI::Struct
470
482
  layout \
471
483
  :m_fmt, :int,
@@ -516,5 +528,15 @@ module HTS
516
528
  LibHTS.bcf_destroy(ptr) unless ptr.null?
517
529
  end
518
530
  end
531
+
532
+ CramContentType = enum(
533
+ :ct_error, -1,
534
+ :file_header, 0,
535
+ :compression_header, 1,
536
+ :mapped_slice, 2,
537
+ :unmapped_slice, 3, # cram v1.0 only
538
+ :external, 4,
539
+ :core, 5
540
+ )
519
541
  end
520
542
  end