htslib 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bcf/info.rb CHANGED
@@ -6,7 +6,6 @@ module HTS
6
6
  class Info
7
7
  def initialize(record)
8
8
  @record = record
9
- @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
9
  end
11
10
 
12
11
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
@@ -16,37 +15,55 @@ module HTS
16
15
  # I think they are better than `fetch_int`` and `fetch_float`.
17
16
  def get(key, type = nil)
18
17
  n = FFI::MemoryPointer.new(:int)
19
- p1 = @p1
18
+ p1 = FFI::MemoryPointer.new(:pointer)
19
+ p1.write_pointer(FFI::Pointer::NULL)
20
20
  h = @record.header.struct
21
21
  r = @record.struct
22
22
 
23
- info_values = proc do |typ|
23
+ info_values = proc do |typ, reader|
24
24
  ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
25
25
  return nil if ret < 0 # return from method.
26
26
 
27
- p1.read_pointer
27
+ dst = p1.read_pointer
28
+ begin
29
+ reader.call(dst, n.read_int)
30
+ ensure
31
+ LibHTS.hts_free(dst) unless dst.null?
32
+ p1.write_pointer(FFI::Pointer::NULL)
33
+ end
34
+ end
35
+
36
+ actual_type = ht_type_to_sym(get_info_type(key))
37
+ if type && actual_type && !info_type_compatible?(actual_type, type.to_sym)
38
+ raise InfoTypeError, "Tag #{key} is not #{type_label(type)} INFO field"
28
39
  end
29
40
 
30
- type ||= ht_type_to_sym(get_info_type(key))
41
+ type ||= actual_type
42
+ return nil if actual_type && !key?(key)
31
43
 
32
44
  case type&.to_sym
33
45
  when :int, :int32
34
- info_values.call(LibHTS::BCF_HT_INT)
35
- .read_array_of_int32(n.read_int)
46
+ info_values.call(LibHTS::BCF_HT_INT, ->(dst, len) { dst.read_array_of_int32(len) })
47
+ when :int64, :long
48
+ info_values.call(LibHTS::BCF_HT_LONG, ->(dst, len) { dst.read_array_of_int64(len) })
36
49
  when :float, :real
37
- info_values.call(LibHTS::BCF_HT_REAL)
38
- .read_array_of_float(n.read_int)
50
+ info_values.call(LibHTS::BCF_HT_REAL, ->(dst, len) { dst.read_array_of_float(len) })
39
51
  when :flag, :bool
40
- case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
41
- when 1 then true
42
- when 0 then false
43
- when -1 then nil
44
- else
45
- raise "Unknown return value from bcf_get_info_flag: #{ret}"
52
+ begin
53
+ case ret = LibHTS.bcf_get_info_flag(h, r, key, p1, n)
54
+ when 1 then true
55
+ when 0 then false
56
+ when -1 then nil
57
+ else
58
+ raise InfoReadError, "Unknown return value from bcf_get_info_flag: #{ret}"
59
+ end
60
+ ensure
61
+ dst = p1.read_pointer
62
+ LibHTS.hts_free(dst) unless dst.null?
63
+ p1.write_pointer(FFI::Pointer::NULL)
46
64
  end
47
65
  when :string, :str
48
- info_values.call(LibHTS::BCF_HT_STR)
49
- .read_string
66
+ info_values.call(LibHTS::BCF_HT_STR, ->(dst, _len) { dst.read_string })
50
67
  end
51
68
  end
52
69
 
@@ -60,6 +77,11 @@ module HTS
60
77
  get(key, :float)
61
78
  end
62
79
 
80
+ # For compatibility with HTS.cr.
81
+ def get_int64(key)
82
+ get(key, :int64)
83
+ end
84
+
63
85
  # For compatibility with HTS.cr.
64
86
  def get_string(key)
65
87
  get(key, :string)
@@ -74,6 +96,194 @@ module HTS
74
96
  get(key)
75
97
  end
76
98
 
99
+ # Set INFO field value with automatic type detection.
100
+ # @param key [String] INFO tag name
101
+ # @param value [Integer, Float, String, Array, true, false, nil] value to set
102
+ # - Integer or Array<Integer> -> update_int
103
+ # - Float or Array<Float,Integer> -> update_float
104
+ # - String -> update_string
105
+ # - true/false -> update_flag
106
+ # - nil -> delete the INFO field
107
+ def []=(key, value)
108
+ case value
109
+ when nil
110
+ delete(key)
111
+ when true, false
112
+ update_flag(key, value)
113
+ when Integer
114
+ unless int32_range?(value)
115
+ raise RangeError,
116
+ "Integer out of int32 range for []=. Current htslib backend does not support int64 INFO update."
117
+ end
118
+
119
+ update_int(key, [value])
120
+ when Float
121
+ update_float(key, [value])
122
+ when String
123
+ update_string(key, value)
124
+ when Array
125
+ if value.empty?
126
+ raise ArgumentError, "Cannot set INFO field to empty array. Use nil to delete."
127
+ elsif value.all? { |v| v.is_a?(Integer) }
128
+ unless value.all? { |v| int32_range?(v) }
129
+ raise RangeError,
130
+ "Integer array contains out-of-int32 values for []=. Current htslib backend does not support int64 INFO update."
131
+ end
132
+
133
+ update_int(key, value)
134
+ elsif value.all? { |v| v.is_a?(Numeric) }
135
+ update_float(key, value)
136
+ else
137
+ raise ArgumentError, "INFO array must contain only integers or floats, got: #{value.map(&:class).uniq}"
138
+ end
139
+ else
140
+ raise ArgumentError, "Unsupported INFO value type: #{value.class}"
141
+ end
142
+ end
143
+
144
+ # Update INFO field with integer value(s).
145
+ # For compatibility with HTS.cr.
146
+ # @param key [String] INFO tag name
147
+ # @param values [Array<Integer>] integer values (use single-element array for scalar)
148
+ def update_int(key, values)
149
+ values = Array(values)
150
+ ptr = FFI::MemoryPointer.new(:int32, values.size)
151
+ ptr.write_array_of_int32(values)
152
+ ret = LibHTS.bcf_update_info(
153
+ @record.header.struct,
154
+ @record.struct,
155
+ key,
156
+ ptr,
157
+ values.size,
158
+ LibHTS::BCF_HT_INT
159
+ )
160
+ raise InfoUpdateError, "Failed to update INFO int field '#{key}': #{ret}" if ret < 0
161
+
162
+ ret
163
+ end
164
+
165
+ # Update INFO field with int64 value(s).
166
+ # @note int64 INFO values are primarily relevant for VCF output.
167
+ # @param key [String] INFO tag name
168
+ # @param values [Array<Integer>] integer values (use single-element array for scalar)
169
+ def update_int64(_key, _values)
170
+ raise UnsupportedInfoOperationError, "htslib backend does not implement int64 INFO update (BCF_HT_LONG)"
171
+ end
172
+
173
+ # Update INFO field with float value(s).
174
+ # For compatibility with HTS.cr.
175
+ # @param key [String] INFO tag name
176
+ # @param values [Array<Float>] float values (use single-element array for scalar)
177
+ def update_float(key, values)
178
+ values = Array(values).map(&:to_f)
179
+ ptr = FFI::MemoryPointer.new(:float, values.size)
180
+ ptr.write_array_of_float(values)
181
+ ret = LibHTS.bcf_update_info(
182
+ @record.header.struct,
183
+ @record.struct,
184
+ key,
185
+ ptr,
186
+ values.size,
187
+ LibHTS::BCF_HT_REAL
188
+ )
189
+ raise InfoUpdateError, "Failed to update INFO float field '#{key}': #{ret}" if ret < 0
190
+
191
+ ret
192
+ end
193
+
194
+ # Update INFO field with string value.
195
+ # For compatibility with HTS.cr.
196
+ # @param key [String] INFO tag name
197
+ # @param value [String] string value
198
+ def update_string(key, value)
199
+ ret = LibHTS.bcf_update_info(
200
+ @record.header.struct,
201
+ @record.struct,
202
+ key,
203
+ value.to_s,
204
+ 1,
205
+ LibHTS::BCF_HT_STR
206
+ )
207
+ raise InfoUpdateError, "Failed to update INFO string field '#{key}': #{ret}" if ret < 0
208
+
209
+ ret
210
+ end
211
+
212
+ # Update INFO flag field.
213
+ # For compatibility with HTS.cr.
214
+ # @param key [String] INFO tag name
215
+ # @param present [Boolean] true to set flag, false to remove it
216
+ def update_flag(key, present = true)
217
+ ret = if present
218
+ LibHTS.bcf_update_info(
219
+ @record.header.struct,
220
+ @record.struct,
221
+ key,
222
+ FFI::Pointer::NULL,
223
+ 1,
224
+ LibHTS::BCF_HT_FLAG
225
+ )
226
+ else
227
+ # Remove flag by setting n=0
228
+ LibHTS.bcf_update_info(
229
+ @record.header.struct,
230
+ @record.struct,
231
+ key,
232
+ FFI::Pointer::NULL,
233
+ 0,
234
+ LibHTS::BCF_HT_FLAG
235
+ )
236
+ end
237
+ raise InfoUpdateError, "Failed to update INFO flag field '#{key}': #{ret}" if ret < 0
238
+
239
+ ret
240
+ end
241
+
242
+ # Delete an INFO field.
243
+ # @param key [String] INFO tag name
244
+ # @return [Boolean] true if field was deleted, false if it didn't exist
245
+ def delete(key)
246
+ type = get_info_type(key)
247
+ return false if type.nil?
248
+ return false unless key?(key)
249
+
250
+ # Delete by setting n=0
251
+ ret = LibHTS.bcf_update_info(
252
+ @record.header.struct,
253
+ @record.struct,
254
+ key,
255
+ FFI::Pointer::NULL,
256
+ 0,
257
+ type
258
+ )
259
+ return false if ret < 0
260
+
261
+ true
262
+ end
263
+
264
+ # Check if an INFO field exists.
265
+ # @param key [String] INFO tag name
266
+ # @return [Boolean] true if the field exists
267
+ def key?(key)
268
+ type = header_info_type(key)
269
+ return false if type.nil?
270
+
271
+ ndst = FFI::MemoryPointer.new(:int)
272
+ ndst.write_int(0)
273
+ dst_ptr = FFI::MemoryPointer.new(:pointer)
274
+ dst_ptr.write_pointer(FFI::Pointer::NULL)
275
+
276
+ ret = LibHTS.bcf_get_info_values(@record.header.struct, @record.struct, key, dst_ptr, ndst, type)
277
+ type == LibHTS::BCF_HT_FLAG ? ret == 1 : ret >= 0
278
+ ensure
279
+ if dst_ptr
280
+ dst = dst_ptr.read_pointer
281
+ LibHTS.hts_free(dst) unless dst.null?
282
+ end
283
+ end
284
+
285
+ alias include? key?
286
+
77
287
  # FIXME: naming? room for improvement.
78
288
  def fields
79
289
  keys.map do |key|
@@ -119,16 +329,15 @@ module HTS
119
329
  end
120
330
 
121
331
  def get_info_type(key)
122
- @record.struct[:n_info].times do |i|
123
- info = LibHTS::BcfInfo.new(@record.struct[:d][:info] + i * LibHTS::BcfInfo.size)
124
- k = info[:key]
125
- id = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, k)
126
- if id == key
127
- type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, k)
128
- return type
129
- end
130
- end
131
- nil
332
+ header_info_type(key)
333
+ end
334
+
335
+ def header_info_type(key)
336
+ id = LibHTS.bcf_hdr_id2int(@record.header.struct, LibHTS::BCF_DT_ID, key)
337
+ return nil if id.negative?
338
+ return nil unless LibHTS.bcf_hdr_idinfo_exists(@record.header.struct, LibHTS::BCF_HL_INFO, id)
339
+
340
+ LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, id)
132
341
  end
133
342
 
134
343
  def ht_type_to_sym(t)
@@ -137,7 +346,39 @@ module HTS
137
346
  when LibHTS::BCF_HT_INT then :int
138
347
  when LibHTS::BCF_HT_REAL then :float
139
348
  when LibHTS::BCF_HT_STR then :string
140
- when LibHTS::BCF_HT_LONG then :float
349
+ when LibHTS::BCF_HT_LONG then :int64
350
+ end
351
+ end
352
+
353
+ def int32_range?(value)
354
+ value >= -2_147_483_648 && value <= 2_147_483_647
355
+ end
356
+
357
+ def info_type_compatible?(actual_type, requested_type)
358
+ case requested_type
359
+ when :int, :int32
360
+ actual_type == :int
361
+ when :int64, :long
362
+ %i[int int64].include?(actual_type)
363
+ when :float, :real
364
+ actual_type == :float
365
+ when :flag, :bool
366
+ actual_type == :flag
367
+ when :string, :str
368
+ actual_type == :string
369
+ else
370
+ actual_type == requested_type
371
+ end
372
+ end
373
+
374
+ def type_label(type)
375
+ case type.to_sym
376
+ when :int, :int32 then "integer"
377
+ when :int64, :long then "integer"
378
+ when :float, :real then "float"
379
+ when :flag, :bool then "flag"
380
+ when :string, :str then "string"
381
+ else type.to_s
141
382
  end
142
383
  end
143
384
  end
@@ -54,11 +54,11 @@ module HTS
54
54
  end
55
55
 
56
56
  def id=(id)
57
- LibHTS.bcf_update_id(@header, @bcf1, id)
57
+ LibHTS.bcf_update_id(@header.struct, @bcf1, id)
58
58
  end
59
59
 
60
60
  def clear_id
61
- LibHTS.bcf_update_id(@header, @bcf1, ".")
61
+ LibHTS.bcf_update_id(@header.struct, @bcf1, ".")
62
62
  end
63
63
 
64
64
  def ref
@@ -100,7 +100,7 @@ module HTS
100
100
  when 1
101
101
  id = d[:flt].read_int
102
102
  LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, id)
103
- when 2..nil
103
+ when 2..
104
104
  d[:flt].get_array_of_int(0, n_flt).map do |i|
105
105
  LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
106
106
  end
@@ -130,9 +130,13 @@ module HTS
130
130
 
131
131
  def to_s
132
132
  ksr = LibHTS::KString.new
133
- raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
133
+ begin
134
+ raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
134
135
 
135
- ksr[:s]
136
+ ksr.read_string_copy
137
+ ensure
138
+ ksr.free_buffer
139
+ end
136
140
  end
137
141
 
138
142
  private