htslib 0.2.3 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "header_record"
4
+
3
5
  module HTS
4
6
  class Bcf < Hts
5
7
  # A class for working with VCF records.
8
+ # NOTE: This class has a lot of methods that are not stable.
9
+ # The method names and the number of arguments may change in the future.
6
10
  class Header
7
- def initialize(hts_file)
8
- @bcf_hdr = LibHTS.bcf_hdr_read(hts_file)
11
+ def initialize(arg = nil)
12
+ case arg
13
+ when LibHTS::HtsFile
14
+ @bcf_hdr = LibHTS.bcf_hdr_read(arg)
15
+ when LibHTS::BcfHdr
16
+ @bcf_hdr = arg
17
+ when nil
18
+ @bcf_hdr = LibHTS.bcf_hdr_init("w")
19
+ else
20
+ raise TypeError, "Invalid argument"
21
+ end
9
22
  end
10
23
 
11
24
  def struct
@@ -20,6 +33,10 @@ module HTS
20
33
  LibHTS.bcf_hdr_get_version(@bcf_hdr)
21
34
  end
22
35
 
36
+ def set_version(version)
37
+ LibHTS.bcf_hdr_set_version(@bcf_hdr, version)
38
+ end
39
+
23
40
  def nsamples
24
41
  LibHTS.bcf_hdr_nsamples(@bcf_hdr)
25
42
  end
@@ -31,6 +48,45 @@ module HTS
31
48
  .map(&:read_string)
32
49
  end
33
50
 
51
+ def add_sample(sample, sync: true)
52
+ LibHTS.bcf_hdr_add_sample(@bcf_hdr, sample)
53
+ self.sync if sync
54
+ end
55
+
56
+ def merge(hdr)
57
+ LibHTS.bcf_hdr_merge(@bcf_hdr, hdr.struct)
58
+ end
59
+
60
+ def sync
61
+ LibHTS.bcf_hdr_sync(@bcf_hdr)
62
+ end
63
+
64
+ def read_bcf(fname)
65
+ LibHTS.bcf_hdr_set(@bcf_hdr, fname)
66
+ end
67
+
68
+ def append(line)
69
+ LibHTS.bcf_hdr_append(@bcf_hdr, line)
70
+ end
71
+
72
+ def delete(bcf_hl_type, key) # FIXME
73
+ type = bcf_hl_type_to_int(bcf_hl_type)
74
+ LibHTS.bcf_hdr_remove(@bcf_hdr, type, key)
75
+ end
76
+
77
+ def get_hrec(bcf_hl_type, key, value, str_class = nil)
78
+ type = bcf_hl_type_to_int(bcf_hl_type)
79
+ hrec = LibHTS.bcf_hdr_get_hrec(@bcf_hdr, type, key, value, str_class)
80
+ HeaderRecord.new(hrec)
81
+ end
82
+
83
+ def seqnames
84
+ n = FFI::MemoryPointer.new(:int)
85
+ names = LibHTS.bcf_hdr_seqnames(@bcf_hdr, n)
86
+ names.read_array_of_pointer(n.read_int)
87
+ .map(&:read_string)
88
+ end
89
+
34
90
  def to_s
35
91
  kstr = LibHTS::KString.new
36
92
  raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
@@ -40,6 +96,27 @@ module HTS
40
96
 
41
97
  private
42
98
 
99
+ def bcf_hl_type_to_int(bcf_hl_type)
100
+ return bcf_hl_type if bcf_hl_type.is_a?(Integer)
101
+
102
+ case bcf_hl_type.to_s.upcase
103
+ when "FILTER", "FIL"
104
+ LibHTS::BCF_HL_FLT
105
+ when "INFO"
106
+ LibHTS::BCF_HL_INFO
107
+ when "FORMAT", "FMT"
108
+ LibHTS::BCF_HL_FMT
109
+ when "CONTIG", "CTG"
110
+ LibHTS::BCF_HL_CTG
111
+ when "STRUCTURED", "STR"
112
+ LibHTS::BCF_HL_STR
113
+ when "GENOTYPE", "GEN"
114
+ LibHTS::BCF_HL_GEN
115
+ else
116
+ raise TypeError, "Invalid argument"
117
+ end
118
+ end
119
+
43
120
  def initialize_copy(orig)
44
121
  @bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
45
122
  end
@@ -3,9 +3,43 @@
3
3
  module HTS
4
4
  class Bcf < Hts
5
5
  class HeaderRecord
6
- def initialize
6
+ def initialize(arg = nil)
7
+ case arg
8
+ when LibHTS::BcfHrec
9
+ @bcf_hrec = arg
10
+ else
11
+ raise TypeError, "Invalid argument"
12
+ end
13
+ end
14
+
15
+ def struct
7
16
  @bcf_hrec
8
17
  end
18
+
19
+ def add_key(key)
20
+ LibHTS.bcf_hrec_add_key(@bcf_hrec, key, key.length)
21
+ end
22
+
23
+ def set_value(i, val, quote: true)
24
+ is_quoted = quote ? 1 : 0
25
+ LibHTS.bcf_hrec_set_val(@bcf_hrec, i, val, val.length, is_quoted)
26
+ end
27
+
28
+ def find_key(key)
29
+ LibHTS.bcf_hrec_find_key(@bcf_hrec, key)
30
+ end
31
+
32
+ def to_s
33
+ kstr = LibHTS::KString.new
34
+ LibHTS.bcf_hrec_format(@bcf_hrec, kstr)
35
+ kstr[:s]
36
+ end
37
+
38
+ private
39
+
40
+ def initialize_copy(orig)
41
+ @bcf_hrec = LibHTS.bcf_hrec_dup(orig.struct)
42
+ end
9
43
  end
10
44
  end
11
45
  end
data/lib/hts/bcf/info.rb CHANGED
@@ -9,31 +9,11 @@ module HTS
9
9
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
10
  end
11
11
 
12
- # For compatibility with htslib.cr.
13
- def get_int(key)
14
- get(key, :int)
15
- end
16
-
17
- # For compatibility with htslib.cr.
18
- def get_float(key)
19
- get(key, :float)
20
- end
21
-
22
- # For compatibility with htslib.cr.
23
- def get_string(key)
24
- get(key, :string)
25
- end
26
-
27
- # For compatibility with htslib.cr.
28
- def get_flag(key)
29
- get(key, :flag)
30
- end
31
-
32
- def [](key)
33
- get(key)
34
- end
35
-
36
12
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
13
+ # @note: Why is this method named "get" instead of "fetch"?
14
+ # This is for compatibility with the Crystal language
15
+ # which provides methods like `get_int`, `get_float`, etc.
16
+ # I think they are better than `fetch_int`` and `fetch_float`.
37
17
  def get(key, type = nil)
38
18
  n = FFI::MemoryPointer.new(:int)
39
19
  p1 = @p1
@@ -70,6 +50,30 @@ module HTS
70
50
  end
71
51
  end
72
52
 
53
+ # For compatibility with HTS.cr.
54
+ def get_int(key)
55
+ get(key, :int)
56
+ end
57
+
58
+ # For compatibility with HTS.cr.
59
+ def get_float(key)
60
+ get(key, :float)
61
+ end
62
+
63
+ # For compatibility with HTS.cr.
64
+ def get_string(key)
65
+ get(key, :string)
66
+ end
67
+
68
+ # For compatibility with HTS.cr.
69
+ def get_flag(key)
70
+ get(key, :flag)
71
+ end
72
+
73
+ def [](key)
74
+ get(key)
75
+ end
76
+
73
77
  # FIXME: naming? room for improvement.
74
78
  def fields
75
79
  keys.map do |key|
data/lib/hts/bcf.rb CHANGED
@@ -52,10 +52,9 @@ module HTS
52
52
  build_index(index) if build_index
53
53
  @idx = load_index(index)
54
54
  @start_position = tell
55
- super # do nothing
56
55
  end
57
56
 
58
- def build_index(index_name = nil, min_shift: 14)
57
+ def build_index(index_name = nil, min_shift: 14, threads: 2)
59
58
  check_closed
60
59
 
61
60
  if index_name
@@ -63,10 +62,15 @@ module HTS
63
62
  else
64
63
  warn "Create index for #{@file_name}"
65
64
  end
66
- r = LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads)
67
- raise "Failed to build index for #{@file_name}" if r < 0
68
-
69
- self
65
+ case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
66
+ when 0 # sccessful
67
+ when -1 then raise "indexing failed"
68
+ when -2 then raise "opening #{@file_name} failed"
69
+ when -3 then raise "format not indexable"
70
+ when -4 then raise "failed to create and/or save the index"
71
+ else raise "unknown error"
72
+ end
73
+ self # for method chaining
70
74
  end
71
75
 
72
76
  def load_index(index_name = nil)
@@ -85,22 +89,34 @@ module HTS
85
89
  !@idx.null?
86
90
  end
87
91
 
88
- def write_header
92
+ def close
93
+ LibHTS.hts_idx_destroy(@idx) unless @idx&.null?
94
+ @idx = nil
95
+ super
96
+ end
97
+
98
+ def write_header(header)
89
99
  check_closed
90
100
 
91
101
  @header = header.dup
92
- LibHTS.hts_set_fai_filename(header, @file_name)
93
102
  LibHTS.bcf_hdr_write(@hts_file, header)
94
103
  end
95
104
 
96
- def write(var)
105
+ def header=(header)
106
+ write_header(header)
107
+ end
108
+
109
+ def write(record)
97
110
  check_closed
98
111
 
99
- var_dup = var.dup
100
- LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
112
+ # record = record.dup
113
+ r = LibHTS.bcf_write(@hts_file, header, record)
114
+ raise "Failed to write record" if r < 0
101
115
  end
102
116
 
103
- # Close the current file.
117
+ def <<(var)
118
+ write(var)
119
+ end
104
120
 
105
121
  def nsamples
106
122
  check_closed
@@ -122,29 +138,6 @@ module HTS
122
138
  end
123
139
  end
124
140
 
125
- private def each_record_copy
126
- check_closed
127
-
128
- return to_enum(__method__) unless block_given?
129
-
130
- while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
131
- record = Record.new(bcf1, header)
132
- yield record
133
- end
134
- self
135
- end
136
-
137
- private def each_record_reuse
138
- check_closed
139
-
140
- return to_enum(__method__) unless block_given?
141
-
142
- bcf1 = LibHTS.bcf_init
143
- record = Record.new(bcf1, header)
144
- yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
145
- self
146
- end
147
-
148
141
  def query(...)
149
142
  querys(...) # Fixme
150
143
  end
@@ -166,55 +159,6 @@ module HTS
166
159
  # private def queryi_reuse
167
160
  # end
168
161
 
169
- private def querys_copy(region)
170
- check_closed
171
-
172
- raise "query is only available for BCF files" unless file_format == "bcf"
173
- raise "Index file is required to call the query method." unless index_loaded?
174
- return to_enum(__method__, region) unless block_given?
175
-
176
- qitr = LibHTS.bcf_itr_querys(@idx, header, region)
177
-
178
- begin
179
- loop do
180
- bcf1 = LibHTS.bcf_init
181
- slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
182
- break if slen == -1
183
- raise if slen < -1
184
-
185
- yield Record.new(bcf1, header)
186
- end
187
- ensure
188
- LibHTS.bcf_itr_destroy(qitr)
189
- end
190
- self
191
- end
192
-
193
- private def querys_reuse(region)
194
- check_closed
195
-
196
- raise "query is only available for BCF files" unless file_format == "bcf"
197
- raise "Index file is required to call the query method." unless index_loaded?
198
- return to_enum(__method__, region) unless block_given?
199
-
200
- qitr = LibHTS.bcf_itr_querys(@idx, header, region)
201
-
202
- bcf1 = LibHTS.bcf_init
203
- record = Record.new(bcf1, header)
204
- begin
205
- loop do
206
- slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
207
- break if slen == -1
208
- raise if slen < -1
209
-
210
- yield record
211
- end
212
- ensure
213
- LibHTS.bcf_itr_destroy(qitr)
214
- end
215
- self
216
- end
217
-
218
162
  # @!macro [attach] define_getter
219
163
  # @method $1
220
164
  # Get $1 array
@@ -231,13 +175,13 @@ module HTS
231
175
  def info(key = nil)
232
176
  check_closed
233
177
  position = tell
234
- if key
235
- ary = map { |r| r.info(key) }
236
- else
237
- raise NotImplementedError
238
- # ary = each_copy.map { |r| r.info }
239
- # ary = map { |r| r.info.clone }
240
- end
178
+ raise NotImplementedError unless key
179
+
180
+ ary = map { |r| r.info(key) }
181
+
182
+ # ary = each_copy.map { |r| r.info }
183
+ # ary = map { |r| r.info.clone }
184
+
241
185
  seek(position)
242
186
  ary
243
187
  end
@@ -245,13 +189,13 @@ module HTS
245
189
  def format(key = nil)
246
190
  check_closed
247
191
  position = tell
248
- if key
249
- ary = map { |r| r.format(key) }
250
- else
251
- raise NotImplementedError
252
- # ary = each_copy.map { |r| r.format }
253
- # ary = map { |r| r.format.clone }
254
- end
192
+ raise NotImplementedError unless key
193
+
194
+ ary = map { |r| r.format(key) }
195
+
196
+ # ary = each_copy.map { |r| r.format }
197
+ # ary = map { |r| r.format.clone }
198
+
255
199
  seek(position)
256
200
  ary
257
201
  end
@@ -285,5 +229,81 @@ module HTS
285
229
  yield r.format(key)
286
230
  end
287
231
  end
232
+
233
+ private
234
+
235
+ def querys_reuse(region)
236
+ check_closed
237
+
238
+ raise "query is only available for BCF files" unless file_format == "bcf"
239
+ raise "Index file is required to call the query method." unless index_loaded?
240
+ return to_enum(__method__, region) unless block_given?
241
+
242
+ qiter = LibHTS.bcf_itr_querys(@idx, header, region)
243
+ raise "Failed to query region #{region}" if qiter.null?
244
+
245
+ bcf1 = LibHTS.bcf_init
246
+ record = Record.new(bcf1, header)
247
+ begin
248
+ loop do
249
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
250
+ break if slen == -1
251
+ raise if slen < -1
252
+
253
+ yield record
254
+ end
255
+ ensure
256
+ LibHTS.bcf_itr_destroy(qiter)
257
+ end
258
+ self
259
+ end
260
+
261
+ def querys_copy(region)
262
+ check_closed
263
+
264
+ raise "query is only available for BCF files" unless file_format == "bcf"
265
+ raise "Index file is required to call the query method." unless index_loaded?
266
+ return to_enum(__method__, region) unless block_given?
267
+
268
+ qiter = LibHTS.bcf_itr_querys(@idx, header, region)
269
+ raise "Failed to query region #{region}" if qiter.null?
270
+
271
+ begin
272
+ loop do
273
+ bcf1 = LibHTS.bcf_init
274
+ slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
275
+ break if slen == -1
276
+ raise if slen < -1
277
+
278
+ yield Record.new(bcf1, header)
279
+ end
280
+ ensure
281
+ LibHTS.bcf_itr_destroy(qiter)
282
+ end
283
+ self
284
+ end
285
+
286
+ def each_record_reuse
287
+ check_closed
288
+
289
+ return to_enum(__method__) unless block_given?
290
+
291
+ bcf1 = LibHTS.bcf_init
292
+ record = Record.new(bcf1, header)
293
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
294
+ self
295
+ end
296
+
297
+ def each_record_copy
298
+ check_closed
299
+
300
+ return to_enum(__method__) unless block_given?
301
+
302
+ while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
303
+ record = Record.new(bcf1, header)
304
+ yield record
305
+ end
306
+ self
307
+ end
288
308
  end
289
309
  end
@@ -0,0 +1,64 @@
1
+ require_relative "../faidx"
2
+
3
+ module HTS
4
+ class Faidx
5
+ class Sequence
6
+ attr_reader :name, :faidx
7
+
8
+ def initialize(faidx, name)
9
+ raise unless faidx.has_key?(name)
10
+
11
+ @faidx = faidx
12
+ @name = name
13
+ end
14
+
15
+ def length
16
+ faidx.seq_len(name)
17
+ end
18
+ alias size length
19
+
20
+ def seq(start = nil, stop = nil)
21
+ faidx.seq(name, start, stop)
22
+ end
23
+
24
+ def qual(start = nil, stop = nil)
25
+ faidx.qual(name, start, stop)
26
+ end
27
+
28
+ def [](arg)
29
+ case arg
30
+ when Integer
31
+ if arg >= 0
32
+ start = arg
33
+ stop = arg
34
+ else
35
+ start = length + arg
36
+ stop = length + arg
37
+ end
38
+ when Range
39
+ arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
40
+ arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
41
+ if arg.begin.nil?
42
+ if arg.end.nil?
43
+ start = nil
44
+ stop = nil
45
+ else
46
+ start = 0
47
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
48
+ end
49
+ elsif arg.end.nil?
50
+ # always include the first base
51
+ start = arg.begin
52
+ stop = length - 1
53
+ else
54
+ start = arg.begin
55
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
56
+ end
57
+ else
58
+ raise ArgumentError
59
+ end
60
+ seq(start, stop)
61
+ end
62
+ end
63
+ end
64
+ end
data/lib/hts/faidx.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "../htslib"
4
+ require_relative "faidx/sequence"
4
5
 
5
6
  module HTS
6
7
  class Faidx
@@ -25,7 +26,11 @@ module HTS
25
26
  end
26
27
 
27
28
  @file_name = file_name
28
- @fai = LibHTS.fai_load(@file_name)
29
+ @fai = if [".fq", ".fastq"].include? File.extname(@file_name)
30
+ LibHTS.fai_load_format(@file_name, 2)
31
+ else
32
+ LibHTS.fai_load(@file_name)
33
+ end
29
34
 
30
35
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
36
  end
@@ -38,10 +43,9 @@ module HTS
38
43
  LibHTS.fai_destroy(@fai)
39
44
  end
40
45
 
41
- # FIXME: This doesn't seem to work as expected
42
- # def closed?
43
- # @fai.null?
44
- # end
46
+ def file_format
47
+ @fai[:format]
48
+ end
45
49
 
46
50
  # the number of sequences in the index.
47
51
  def length
@@ -50,31 +54,48 @@ module HTS
50
54
  alias size length
51
55
 
52
56
  # return the length of the requested chromosome.
53
- def chrom_size(chrom)
57
+ def names
58
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
59
+ end
60
+
61
+ alias keys names
62
+
63
+ def has_key?(key)
64
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
65
+
66
+ key = key.to_s
67
+ case LibHTS.faidx_has_seq(@fai, key)
68
+ when 1 then true
69
+ when 0 then false
70
+ else raise
71
+ end
72
+ end
73
+
74
+ def [](name)
75
+ name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
76
+ Sequence.new(self, name)
77
+ end
78
+
79
+ # return the length of the requested chromosome.
80
+ def seq_len(chrom)
54
81
  raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
55
82
 
56
83
  chrom = chrom.to_s
57
84
  result = LibHTS.faidx_seq_len(@fai, chrom)
58
85
  result == -1 ? nil : result
59
86
  end
60
- alias chrom_length chrom_size
61
87
 
62
- # return the length of the requested chromosome.
63
- def chrom_names
64
- Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
- end
66
-
67
- # @overload fetch(name)
88
+ # @overload seq(name)
68
89
  # Fetch the sequence as a String.
69
90
  # @param name [String] chr1:0-10
70
- # @overload fetch(name, start, stop)
91
+ # @overload seq(name, start, stop)
71
92
  # Fetch the sequence as a String.
72
93
  # @param name [String] the name of the chromosome
73
94
  # @param start [Integer] the start position of the sequence (0-based)
74
95
  # @param stop [Integer] the end position of the sequence (0-based)
75
96
  # @return [String] the sequence
76
97
 
77
- def seq(name, start = nil, stop = nil)
98
+ def fetch_seq(name, start = nil, stop = nil)
78
99
  name = name.to_s
79
100
  rlen = FFI::MemoryPointer.new(:int)
80
101
 
@@ -84,6 +105,7 @@ module HTS
84
105
  start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
106
  stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
107
  start > stop && raise(ArgumentError, "Expect start to be <= stop")
108
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
87
109
 
88
110
  result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
89
111
  end
@@ -95,5 +117,32 @@ module HTS
95
117
 
96
118
  result
97
119
  end
120
+
121
+ alias seq fetch_seq
122
+
123
+ def fetch_qual(name, start = nil, stop = nil)
124
+ name = name.to_s
125
+ rlen = FFI::MemoryPointer.new(:int)
126
+
127
+ if start.nil? && stop.nil?
128
+ result = LibHTS.fai_fetchqual(@fai, name, rlen)
129
+ else
130
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
131
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
132
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
133
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
134
+
135
+ result = LibHTS.faidx_fetch_qual(@fai, name, start, stop, rlen)
136
+ end
137
+
138
+ case rlen.read_int
139
+ when -2 then raise "Invalid chromosome name: #{name}"
140
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
141
+ end
142
+
143
+ result
144
+ end
145
+
146
+ alias qual fetch_qual
98
147
  end
99
148
  end