htslib 0.2.3 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "header_record"
4
+
3
5
  module HTS
4
6
  class Bcf < Hts
5
7
  # A class for working with VCF records.
8
+ # NOTE: This class has a lot of methods that are not stable.
9
+ # The method names and the number of arguments may change in the future.
6
10
  class Header
7
- def initialize(hts_file)
8
- @bcf_hdr = LibHTS.bcf_hdr_read(hts_file)
11
+ def initialize(arg = nil)
12
+ case arg
13
+ when LibHTS::HtsFile
14
+ @bcf_hdr = LibHTS.bcf_hdr_read(arg)
15
+ when LibHTS::BcfHdr
16
+ @bcf_hdr = arg
17
+ when nil
18
+ @bcf_hdr = LibHTS.bcf_hdr_init("w")
19
+ else
20
+ raise TypeError, "Invalid argument"
21
+ end
9
22
  end
10
23
 
11
24
  def struct
@@ -20,6 +33,10 @@ module HTS
20
33
  LibHTS.bcf_hdr_get_version(@bcf_hdr)
21
34
  end
22
35
 
36
+ def set_version(version)
37
+ LibHTS.bcf_hdr_set_version(@bcf_hdr, version)
38
+ end
39
+
23
40
  def nsamples
24
41
  LibHTS.bcf_hdr_nsamples(@bcf_hdr)
25
42
  end
@@ -31,6 +48,45 @@ module HTS
31
48
  .map(&:read_string)
32
49
  end
33
50
 
51
+ def add_sample(sample, sync: true)
52
+ LibHTS.bcf_hdr_add_sample(@bcf_hdr, sample)
53
+ self.sync if sync
54
+ end
55
+
56
+ def merge(hdr)
57
+ LibHTS.bcf_hdr_merge(@bcf_hdr, hdr.struct)
58
+ end
59
+
60
+ def sync
61
+ LibHTS.bcf_hdr_sync(@bcf_hdr)
62
+ end
63
+
64
+ def read_bcf(fname)
65
+ LibHTS.bcf_hdr_set(@bcf_hdr, fname)
66
+ end
67
+
68
+ def append(line)
69
+ LibHTS.bcf_hdr_append(@bcf_hdr, line)
70
+ end
71
+
72
+ def delete(bcf_hl_type, key) # FIXME
73
+ type = bcf_hl_type_to_int(bcf_hl_type)
74
+ LibHTS.bcf_hdr_remove(@bcf_hdr, type, key)
75
+ end
76
+
77
+ def get_hrec(bcf_hl_type, key, value, str_class = nil)
78
+ type = bcf_hl_type_to_int(bcf_hl_type)
79
+ hrec = LibHTS.bcf_hdr_get_hrec(@bcf_hdr, type, key, value, str_class)
80
+ HeaderRecord.new(hrec)
81
+ end
82
+
83
+ def seqnames
84
+ n = FFI::MemoryPointer.new(:int)
85
+ names = LibHTS.bcf_hdr_seqnames(@bcf_hdr, n)
86
+ names.read_array_of_pointer(n.read_int)
87
+ .map(&:read_string)
88
+ end
89
+
34
90
  def to_s
35
91
  kstr = LibHTS::KString.new
36
92
  raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
@@ -40,6 +96,27 @@ module HTS
40
96
 
41
97
  private
42
98
 
99
+ def bcf_hl_type_to_int(bcf_hl_type)
100
+ return bcf_hl_type if bcf_hl_type.is_a?(Integer)
101
+
102
+ case bcf_hl_type.to_s.upcase
103
+ when "FILTER", "FIL"
104
+ LibHTS::BCF_HL_FLT
105
+ when "INFO"
106
+ LibHTS::BCF_HL_INFO
107
+ when "FORMAT", "FMT"
108
+ LibHTS::BCF_HL_FMT
109
+ when "CONTIG", "CTG"
110
+ LibHTS::BCF_HL_CTG
111
+ when "STRUCTURED", "STR"
112
+ LibHTS::BCF_HL_STR
113
+ when "GENOTYPE", "GEN"
114
+ LibHTS::BCF_HL_GEN
115
+ else
116
+ raise TypeError, "Invalid argument"
117
+ end
118
+ end
119
+
43
120
  def initialize_copy(orig)
44
121
  @bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
45
122
  end
@@ -3,9 +3,43 @@
3
3
  module HTS
4
4
  class Bcf < Hts
5
5
  class HeaderRecord
6
- def initialize
6
+ def initialize(arg = nil)
7
+ case arg
8
+ when LibHTS::BcfHrec
9
+ @bcf_hrec = arg
10
+ else
11
+ raise TypeError, "Invalid argument"
12
+ end
13
+ end
14
+
15
+ def struct
7
16
  @bcf_hrec
8
17
  end
18
+
19
+ def add_key(key)
20
+ LibHTS.bcf_hrec_add_key(@bcf_hrec, key, key.length)
21
+ end
22
+
23
+ def set_value(i, val, quote: true)
24
+ is_quoted = quote ? 1 : 0
25
+ LibHTS.bcf_hrec_set_val(@bcf_hrec, i, val, val.length, is_quoted)
26
+ end
27
+
28
+ def find_key(key)
29
+ LibHTS.bcf_hrec_find_key(@bcf_hrec, key)
30
+ end
31
+
32
+ def to_s
33
+ kstr = LibHTS::KString.new
34
+ LibHTS.bcf_hrec_format(@bcf_hrec, kstr)
35
+ kstr[:s]
36
+ end
37
+
38
+ private
39
+
40
+ def initialize_copy(orig)
41
+ @bcf_hrec = LibHTS.bcf_hrec_dup(orig.struct)
42
+ end
9
43
  end
10
44
  end
11
45
  end
data/lib/hts/bcf/info.rb CHANGED
@@ -9,31 +9,11 @@ module HTS
9
9
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
10
  end
11
11
 
12
- # For compatibility with htslib.cr.
13
- def get_int(key)
14
- get(key, :int)
15
- end
16
-
17
- # For compatibility with htslib.cr.
18
- def get_float(key)
19
- get(key, :float)
20
- end
21
-
22
- # For compatibility with htslib.cr.
23
- def get_string(key)
24
- get(key, :string)
25
- end
26
-
27
- # For compatibility with htslib.cr.
28
- def get_flag(key)
29
- get(key, :flag)
30
- end
31
-
32
- def [](key)
33
- get(key)
34
- end
35
-
36
12
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
13
+ # @note: Why is this method named "get" instead of "fetch"?
14
+ # This is for compatibility with the Crystal language
15
+ # which provides methods like `get_int`, `get_float`, etc.
16
+ # I think they are better than `fetch_int`` and `fetch_float`.
37
17
  def get(key, type = nil)
38
18
  n = FFI::MemoryPointer.new(:int)
39
19
  p1 = @p1
@@ -70,6 +50,30 @@ module HTS
70
50
  end
71
51
  end
72
52
 
53
+ # For compatibility with HTS.cr.
54
+ def get_int(key)
55
+ get(key, :int)
56
+ end
57
+
58
+ # For compatibility with HTS.cr.
59
+ def get_float(key)
60
+ get(key, :float)
61
+ end
62
+
63
+ # For compatibility with HTS.cr.
64
+ def get_string(key)
65
+ get(key, :string)
66
+ end
67
+
68
+ # For compatibility with HTS.cr.
69
+ def get_flag(key)
70
+ get(key, :flag)
71
+ end
72
+
73
+ def [](key)
74
+ get(key)
75
+ end
76
+
73
77
  # FIXME: naming? room for improvement.
74
78
  def fields
75
79
  keys.map do |key|
data/lib/hts/bcf.rb CHANGED
@@ -52,10 +52,9 @@ module HTS
52
52
  build_index(index) if build_index
53
53
  @idx = load_index(index)
54
54
  @start_position = tell
55
- super # do nothing
56
55
  end
57
56
 
58
- def build_index(index_name = nil, min_shift: 14)
57
+ def build_index(index_name = nil, min_shift: 14, threads: 2)
59
58
  check_closed
60
59
 
61
60
  if index_name
@@ -63,10 +62,15 @@ module HTS
63
62
  else
64
63
  warn "Create index for #{@file_name}"
65
64
  end
66
- r = LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads)
67
- raise "Failed to build index for #{@file_name}" if r < 0
68
-
69
- self
65
+ case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
66
+ when 0 # sccessful
67
+ when -1 then raise "indexing failed"
68
+ when -2 then raise "opening #{@file_name} failed"
69
+ when -3 then raise "format not indexable"
70
+ when -4 then raise "failed to create and/or save the index"
71
+ else raise "unknown error"
72
+ end
73
+ self # for method chaining
70
74
  end
71
75
 
72
76
  def load_index(index_name = nil)
@@ -85,22 +89,34 @@ module HTS
85
89
  !@idx.null?
86
90
  end
87
91
 
88
- def write_header
92
+ def close
93
+ LibHTS.hts_idx_destroy(@idx) unless @idx&.null?
94
+ @idx = nil
95
+ super
96
+ end
97
+
98
+ def write_header(header)
89
99
  check_closed
90
100
 
91
101
  @header = header.dup
92
- LibHTS.hts_set_fai_filename(header, @file_name)
93
102
  LibHTS.bcf_hdr_write(@hts_file, header)
94
103
  end
95
104
 
96
- def write(var)
105
+ def header=(header)
106
+ write_header(header)
107
+ end
108
+
109
+ def write(record)
97
110
  check_closed
98
111
 
99
- var_dup = var.dup
100
- LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
112
+ # record = record.dup
113
+ r = LibHTS.bcf_write(@hts_file, header, record)
114
+ raise "Failed to write record" if r < 0
101
115
  end
102
116
 
103
- # Close the current file.
117
+ def <<(var)
118
+ write(var)
119
+ end
104
120
 
105
121
  def nsamples
106
122
  check_closed
@@ -122,29 +138,6 @@ module HTS
122
138
  end
123
139
  end
124
140
 
125
- private def each_record_copy
126
- check_closed
127
-
128
- return to_enum(__method__) unless block_given?
129
-
130
- while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
131
- record = Record.new(bcf1, header)
132
- yield record
133
- end
134
- self
135
- end
136
-
137
- private def each_record_reuse
138
- check_closed
139
-
140
- return to_enum(__method__) unless block_given?
141
-
142
- bcf1 = LibHTS.bcf_init
143
- record = Record.new(bcf1, header)
144
- yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
145
- self
146
- end
147
-
148
141
  def query(...)
149
142
  querys(...) # Fixme
150
143
  end
@@ -166,55 +159,6 @@ module HTS
166
159
  # private def queryi_reuse
167
160
  # end
168
161
 
169
- private def querys_copy(region)
170
- check_closed
171
-
172
- raise "query is only available for BCF files" unless file_format == "bcf"
173
- raise "Index file is required to call the query method." unless index_loaded?
174
- return to_enum(__method__, region) unless block_given?
175
-
176
- qitr = LibHTS.bcf_itr_querys(@idx, header, region)
177
-
178
- begin
179
- loop do
180
- bcf1 = LibHTS.bcf_init
181
- slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
182
- break if slen == -1
183
- raise if slen < -1
184
-
185
- yield Record.new(bcf1, header)
186
- end
187
- ensure
188
- LibHTS.bcf_itr_destroy(qitr)
189
- end
190
- self
191
- end
192
-
193
- private def querys_reuse(region)
194
- check_closed
195
-
196
- raise "query is only available for BCF files" unless file_format == "bcf"
197
- raise "Index file is required to call the query method." unless index_loaded?
198
- return to_enum(__method__, region) unless block_given?
199
-
200
- qitr = LibHTS.bcf_itr_querys(@idx, header, region)
201
-
202
- bcf1 = LibHTS.bcf_init
203
- record = Record.new(bcf1, header)
204
- begin
205
- loop do
206
- slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
207
- break if slen == -1
208
- raise if slen < -1
209
-
210
- yield record
211
- end
212
- ensure
213
- LibHTS.bcf_itr_destroy(qitr)
214
- end
215
- self
216
- end
217
-
218
162
  # @!macro [attach] define_getter
219
163
  # @method $1
220
164
  # Get $1 array
@@ -231,13 +175,13 @@ module HTS
231
175
  def info(key = nil)
232
176
  check_closed
233
177
  position = tell
234
- if key
235
- ary = map { |r| r.info(key) }
236
- else
237
- raise NotImplementedError
238
- # ary = each_copy.map { |r| r.info }
239
- # ary = map { |r| r.info.clone }
240
- end
178
+ raise NotImplementedError unless key
179
+
180
+ ary = map { |r| r.info(key) }
181
+
182
+ # ary = each_copy.map { |r| r.info }
183
+ # ary = map { |r| r.info.clone }
184
+
241
185
  seek(position)
242
186
  ary
243
187
  end
@@ -245,13 +189,13 @@ module HTS
245
189
  def format(key = nil)
246
190
  check_closed
247
191
  position = tell
248
- if key
249
- ary = map { |r| r.format(key) }
250
- else
251
- raise NotImplementedError
252
- # ary = each_copy.map { |r| r.format }
253
- # ary = map { |r| r.format.clone }
254
- end
192
+ raise NotImplementedError unless key
193
+
194
+ ary = map { |r| r.format(key) }
195
+
196
+ # ary = each_copy.map { |r| r.format }
197
+ # ary = map { |r| r.format.clone }
198
+
255
199
  seek(position)
256
200
  ary
257
201
  end
@@ -285,5 +229,81 @@ module HTS
285
229
  yield r.format(key)
286
230
  end
287
231
  end
232
+
233
+ private
234
+
235
+ def querys_reuse(region)
236
+ check_closed
237
+
238
+ raise "query is only available for BCF files" unless file_format == "bcf"
239
+ raise "Index file is required to call the query method." unless index_loaded?
240
+ return to_enum(__method__, region) unless block_given?
241
+
242
+ qiter = LibHTS.bcf_itr_querys(@idx, header, region)
243
+ raise "Failed to query region #{region}" if qiter.null?
244
+
245
+ bcf1 = LibHTS.bcf_init
246
+ record = Record.new(bcf1, header)
247
+ begin
248
+ loop do
249
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
250
+ break if slen == -1
251
+ raise if slen < -1
252
+
253
+ yield record
254
+ end
255
+ ensure
256
+ LibHTS.bcf_itr_destroy(qiter)
257
+ end
258
+ self
259
+ end
260
+
261
+ def querys_copy(region)
262
+ check_closed
263
+
264
+ raise "query is only available for BCF files" unless file_format == "bcf"
265
+ raise "Index file is required to call the query method." unless index_loaded?
266
+ return to_enum(__method__, region) unless block_given?
267
+
268
+ qiter = LibHTS.bcf_itr_querys(@idx, header, region)
269
+ raise "Failed to query region #{region}" if qiter.null?
270
+
271
+ begin
272
+ loop do
273
+ bcf1 = LibHTS.bcf_init
274
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
275
+ break if slen == -1
276
+ raise if slen < -1
277
+
278
+ yield Record.new(bcf1, header)
279
+ end
280
+ ensure
281
+ LibHTS.bcf_itr_destroy(qiter)
282
+ end
283
+ self
284
+ end
285
+
286
+ def each_record_reuse
287
+ check_closed
288
+
289
+ return to_enum(__method__) unless block_given?
290
+
291
+ bcf1 = LibHTS.bcf_init
292
+ record = Record.new(bcf1, header)
293
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
294
+ self
295
+ end
296
+
297
+ def each_record_copy
298
+ check_closed
299
+
300
+ return to_enum(__method__) unless block_given?
301
+
302
+ while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
303
+ record = Record.new(bcf1, header)
304
+ yield record
305
+ end
306
+ self
307
+ end
288
308
  end
289
309
  end
@@ -0,0 +1,64 @@
1
+ require_relative "../faidx"
2
+
3
+ module HTS
4
+ class Faidx
5
+ class Sequence
6
+ attr_reader :name, :faidx
7
+
8
+ def initialize(faidx, name)
9
+ raise unless faidx.has_key?(name)
10
+
11
+ @faidx = faidx
12
+ @name = name
13
+ end
14
+
15
+ def length
16
+ faidx.seq_len(name)
17
+ end
18
+ alias size length
19
+
20
+ def seq(start = nil, stop = nil)
21
+ faidx.seq(name, start, stop)
22
+ end
23
+
24
+ def qual(start = nil, stop = nil)
25
+ faidx.qual(name, start, stop)
26
+ end
27
+
28
+ def [](arg)
29
+ case arg
30
+ when Integer
31
+ if arg >= 0
32
+ start = arg
33
+ stop = arg
34
+ else
35
+ start = length + arg
36
+ stop = length + arg
37
+ end
38
+ when Range
39
+ arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
40
+ arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
41
+ if arg.begin.nil?
42
+ if arg.end.nil?
43
+ start = nil
44
+ stop = nil
45
+ else
46
+ start = 0
47
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
48
+ end
49
+ elsif arg.end.nil?
50
+ # always include the first base
51
+ start = arg.begin
52
+ stop = length - 1
53
+ else
54
+ start = arg.begin
55
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
56
+ end
57
+ else
58
+ raise ArgumentError
59
+ end
60
+ seq(start, stop)
61
+ end
62
+ end
63
+ end
64
+ end
data/lib/hts/faidx.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "../htslib"
4
+ require_relative "faidx/sequence"
4
5
 
5
6
  module HTS
6
7
  class Faidx
@@ -25,7 +26,11 @@ module HTS
25
26
  end
26
27
 
27
28
  @file_name = file_name
28
- @fai = LibHTS.fai_load(@file_name)
29
+ @fai = if [".fq", ".fastq"].include? File.extname(@file_name)
30
+ LibHTS.fai_load_format(@file_name, 2)
31
+ else
32
+ LibHTS.fai_load(@file_name)
33
+ end
29
34
 
30
35
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
36
  end
@@ -38,10 +43,9 @@ module HTS
38
43
  LibHTS.fai_destroy(@fai)
39
44
  end
40
45
 
41
- # FIXME: This doesn't seem to work as expected
42
- # def closed?
43
- # @fai.null?
44
- # end
46
+ def file_format
47
+ @fai[:format]
48
+ end
45
49
 
46
50
  # the number of sequences in the index.
47
51
  def length
@@ -50,31 +54,48 @@ module HTS
50
54
  alias size length
51
55
 
52
56
  # return the length of the requested chromosome.
53
- def chrom_size(chrom)
57
+ def names
58
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
59
+ end
60
+
61
+ alias keys names
62
+
63
+ def has_key?(key)
64
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
65
+
66
+ key = key.to_s
67
+ case LibHTS.faidx_has_seq(@fai, key)
68
+ when 1 then true
69
+ when 0 then false
70
+ else raise
71
+ end
72
+ end
73
+
74
+ def [](name)
75
+ name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
76
+ Sequence.new(self, name)
77
+ end
78
+
79
+ # return the length of the requested chromosome.
80
+ def seq_len(chrom)
54
81
  raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
55
82
 
56
83
  chrom = chrom.to_s
57
84
  result = LibHTS.faidx_seq_len(@fai, chrom)
58
85
  result == -1 ? nil : result
59
86
  end
60
- alias chrom_length chrom_size
61
87
 
62
- # return the length of the requested chromosome.
63
- def chrom_names
64
- Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
- end
66
-
67
- # @overload fetch(name)
88
+ # @overload seq(name)
68
89
  # Fetch the sequence as a String.
69
90
  # @param name [String] chr1:0-10
70
- # @overload fetch(name, start, stop)
91
+ # @overload seq(name, start, stop)
71
92
  # Fetch the sequence as a String.
72
93
  # @param name [String] the name of the chromosome
73
94
  # @param start [Integer] the start position of the sequence (0-based)
74
95
  # @param stop [Integer] the end position of the sequence (0-based)
75
96
  # @return [String] the sequence
76
97
 
77
- def seq(name, start = nil, stop = nil)
98
+ def fetch_seq(name, start = nil, stop = nil)
78
99
  name = name.to_s
79
100
  rlen = FFI::MemoryPointer.new(:int)
80
101
 
@@ -84,6 +105,7 @@ module HTS
84
105
  start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
106
  stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
107
  start > stop && raise(ArgumentError, "Expect start to be <= stop")
108
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
87
109
 
88
110
  result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
89
111
  end
@@ -95,5 +117,32 @@ module HTS
95
117
 
96
118
  result
97
119
  end
120
+
121
+ alias seq fetch_seq
122
+
123
+ def fetch_qual(name, start = nil, stop = nil)
124
+ name = name.to_s
125
+ rlen = FFI::MemoryPointer.new(:int)
126
+
127
+ if start.nil? && stop.nil?
128
+ result = LibHTS.fai_fetchqual(@fai, name, rlen)
129
+ else
130
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
131
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
132
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
133
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
134
+
135
+ result = LibHTS.faidx_fetch_qual(@fai, name, start, stop, rlen)
136
+ end
137
+
138
+ case rlen.read_int
139
+ when -2 then raise "Invalid chromosome name: #{name}"
140
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
141
+ end
142
+
143
+ result
144
+ end
145
+
146
+ alias qual fetch_qual
98
147
  end
99
148
  end