htslib 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bcf/info.rb CHANGED
@@ -74,6 +74,164 @@ module HTS
74
74
  get(key)
75
75
  end
76
76
 
77
+ # Set INFO field value with automatic type detection.
78
+ # @param key [String] INFO tag name
79
+ # @param value [Integer, Float, String, Array, true, false, nil] value to set
80
+ # - Integer or Array<Integer> -> update_int
81
+ # - Float or Array<Float,Integer> -> update_float
82
+ # - String -> update_string
83
+ # - true/false -> update_flag
84
+ # - nil -> delete the INFO field
85
+ def []=(key, value)
86
+ case value
87
+ when nil
88
+ delete(key)
89
+ when true, false
90
+ update_flag(key, value)
91
+ when Integer
92
+ update_int(key, [value])
93
+ when Float
94
+ update_float(key, [value])
95
+ when String
96
+ update_string(key, value)
97
+ when Array
98
+ if value.empty?
99
+ raise ArgumentError, "Cannot set INFO field to empty array. Use nil to delete."
100
+ elsif value.all? { |v| v.is_a?(Integer) }
101
+ update_int(key, value)
102
+ elsif value.all? { |v| v.is_a?(Numeric) }
103
+ update_float(key, value)
104
+ else
105
+ raise ArgumentError, "INFO array must contain only integers or floats, got: #{value.map(&:class).uniq}"
106
+ end
107
+ else
108
+ raise ArgumentError, "Unsupported INFO value type: #{value.class}"
109
+ end
110
+ end
111
+
112
+ # Update INFO field with integer value(s).
113
+ # For compatibility with HTS.cr.
114
+ # @param key [String] INFO tag name
115
+ # @param values [Array<Integer>] integer values (use single-element array for scalar)
116
+ def update_int(key, values)
117
+ values = Array(values)
118
+ ptr = FFI::MemoryPointer.new(:int32, values.size)
119
+ ptr.write_array_of_int32(values)
120
+ ret = LibHTS.bcf_update_info(
121
+ @record.header.struct,
122
+ @record.struct,
123
+ key,
124
+ ptr,
125
+ values.size,
126
+ LibHTS::BCF_HT_INT
127
+ )
128
+ raise "Failed to update INFO int field '#{key}': #{ret}" if ret < 0
129
+
130
+ ret
131
+ end
132
+
133
+ # Update INFO field with float value(s).
134
+ # For compatibility with HTS.cr.
135
+ # @param key [String] INFO tag name
136
+ # @param values [Array<Float>] float values (use single-element array for scalar)
137
+ def update_float(key, values)
138
+ values = Array(values).map(&:to_f)
139
+ ptr = FFI::MemoryPointer.new(:float, values.size)
140
+ ptr.write_array_of_float(values)
141
+ ret = LibHTS.bcf_update_info(
142
+ @record.header.struct,
143
+ @record.struct,
144
+ key,
145
+ ptr,
146
+ values.size,
147
+ LibHTS::BCF_HT_REAL
148
+ )
149
+ raise "Failed to update INFO float field '#{key}': #{ret}" if ret < 0
150
+
151
+ ret
152
+ end
153
+
154
+ # Update INFO field with string value.
155
+ # For compatibility with HTS.cr.
156
+ # @param key [String] INFO tag name
157
+ # @param value [String] string value
158
+ def update_string(key, value)
159
+ ret = LibHTS.bcf_update_info(
160
+ @record.header.struct,
161
+ @record.struct,
162
+ key,
163
+ value.to_s,
164
+ 1,
165
+ LibHTS::BCF_HT_STR
166
+ )
167
+ raise "Failed to update INFO string field '#{key}': #{ret}" if ret < 0
168
+
169
+ ret
170
+ end
171
+
172
+ # Update INFO flag field.
173
+ # For compatibility with HTS.cr.
174
+ # @param key [String] INFO tag name
175
+ # @param present [Boolean] true to set flag, false to remove it
176
+ def update_flag(key, present = true)
177
+ ret = if present
178
+ LibHTS.bcf_update_info(
179
+ @record.header.struct,
180
+ @record.struct,
181
+ key,
182
+ FFI::Pointer::NULL,
183
+ 1,
184
+ LibHTS::BCF_HT_FLAG
185
+ )
186
+ else
187
+ # Remove flag by setting n=0
188
+ LibHTS.bcf_update_info(
189
+ @record.header.struct,
190
+ @record.struct,
191
+ key,
192
+ FFI::Pointer::NULL,
193
+ 0,
194
+ LibHTS::BCF_HT_FLAG
195
+ )
196
+ end
197
+ raise "Failed to update INFO flag field '#{key}': #{ret}" if ret < 0
198
+
199
+ ret
200
+ end
201
+
202
+ # Delete an INFO field.
203
+ # @param key [String] INFO tag name
204
+ # @return [Boolean] true if field was deleted, false if it didn't exist
205
+ def delete(key)
206
+ # Try to get current type to check existence
207
+ type = get_info_type(key)
208
+ return false if type.nil?
209
+
210
+ # Delete by setting n=0
211
+ ret = LibHTS.bcf_update_info(
212
+ @record.header.struct,
213
+ @record.struct,
214
+ key,
215
+ FFI::Pointer::NULL,
216
+ 0,
217
+ type
218
+ )
219
+ return false if ret < 0
220
+
221
+ true
222
+ end
223
+
224
+ # Check if an INFO field exists.
225
+ # @param key [String] INFO tag name
226
+ # @return [Boolean] true if the field exists
227
+ def key?(key)
228
+ # Use get() to check if value is actually present
229
+ # (get_info_type only checks header, not actual value)
230
+ !get(key).nil?
231
+ end
232
+
233
+ alias include? key?
234
+
77
235
  # FIXME: naming? room for improvement.
78
236
  def fields
79
237
  keys.map do |key|
data/lib/hts/bcf.rb CHANGED
@@ -215,13 +215,20 @@ module HTS
215
215
  raise "query is only available for BCF files" unless file_format == "bcf"
216
216
  raise "Index file is required to call the query method." unless index_loaded?
217
217
 
218
- if beg && end_
219
- tid = header.name2id(region)
220
- queryi(tid, beg, end_, copy:, &block)
221
- elsif beg.nil? && end_.nil?
222
- querys(region, copy:, &block)
218
+ case region
219
+ when Array
220
+ raise ArgumentError, "beg and end must not be specified when region is an Array" unless beg.nil? && end_.nil?
221
+
222
+ query_regions(region, copy:, &block)
223
223
  else
224
- raise ArgumentError, "beg and end must be specified together"
224
+ if beg && end_
225
+ tid = header.name2id(region)
226
+ queryi(tid, beg, end_, copy:, &block)
227
+ elsif beg.nil? && end_.nil?
228
+ querys(region, copy:, &block)
229
+ else
230
+ raise ArgumentError, "beg and end must be specified together"
231
+ end
225
232
  end
226
233
  end
227
234
 
@@ -243,6 +250,14 @@ module HTS
243
250
  end
244
251
  end
245
252
 
253
+ def query_regions(regions, copy: false, &block)
254
+ if copy
255
+ query_regions_copy(regions, &block)
256
+ else
257
+ query_regions_reuse(regions, &block)
258
+ end
259
+ end
260
+
246
261
  def queryi_reuse(tid, beg, end_, &block)
247
262
  return to_enum(__method__, tid, beg, end_) unless block_given?
248
263
 
@@ -263,6 +278,15 @@ module HTS
263
278
  self
264
279
  end
265
280
 
281
+ def query_regions_reuse(regions, &block)
282
+ return to_enum(__method__, regions) unless block_given?
283
+
284
+ regions.each do |region|
285
+ querys_reuse(region, &block)
286
+ end
287
+ self
288
+ end
289
+
266
290
  def query_reuse_yield(qiter)
267
291
  bcf1 = LibHTS.bcf_init
268
292
  record = Record.new(header, bcf1)
@@ -299,6 +323,15 @@ module HTS
299
323
  self
300
324
  end
301
325
 
326
+ def query_regions_copy(regions, &block)
327
+ return to_enum(__method__, regions) unless block_given?
328
+
329
+ regions.each do |region|
330
+ querys_copy(region, &block)
331
+ end
332
+ self
333
+ end
334
+
302
335
  def query_copy_yield(qiter)
303
336
  loop do
304
337
  bcf1 = LibHTS.bcf_init
@@ -4,7 +4,7 @@ module HTS
4
4
  attr_reader :name, :faidx
5
5
 
6
6
  def initialize(faidx, name)
7
- raise unless faidx.has_key?(name)
7
+ raise ArgumentError, "Sequence not found: #{name}" unless faidx.has_key?(name)
8
8
 
9
9
  @faidx = faidx
10
10
  @name = name
data/lib/hts/faidx.rb CHANGED
@@ -5,6 +5,8 @@ require_relative "faidx/sequence"
5
5
 
6
6
  module HTS
7
7
  class Faidx
8
+ include Enumerable
9
+
8
10
  attr_reader :file_name
9
11
 
10
12
  def self.open(*args, **kw)
@@ -20,12 +22,9 @@ module HTS
20
22
  end
21
23
 
22
24
  def initialize(file_name)
23
- if block_given?
24
- message = "HTS::Faidx.new() does not take block; Please use HTS::Faidx.open() instead"
25
- raise message
26
- end
25
+ raise ArgumentError, "HTS::Faidx.new() does not take block; Please use HTS::Faidx.open() instead" if block_given?
27
26
 
28
- @file_name = file_name
27
+ @file_name = file_name.freeze
29
28
  @fai = case File.extname(@file_name)
30
29
  when ".fq", ".fastq"
31
30
  LibHTS.fai_load_format(@file_name, 2)
@@ -52,75 +51,102 @@ module HTS
52
51
  end
53
52
 
54
53
  def file_format
54
+ check_closed
55
55
  @fai[:format]
56
56
  end
57
57
 
58
+ # Iterate over each sequence in the index.
59
+ # @yield [Sequence] each sequence object
60
+ # @return [Enumerator] if no block given
61
+ def each
62
+ return to_enum(__method__) unless block_given?
63
+
64
+ check_closed
65
+ names.each { |name| yield self[name] }
66
+ end
67
+
58
68
  # the number of sequences in the index.
69
+ # @return [Integer] the number of sequences
59
70
  def length
71
+ check_closed
60
72
  LibHTS.faidx_nseq(@fai)
61
73
  end
62
74
  alias size length
63
75
 
64
- # return the length of the requested chromosome.
76
+ # Return the list of sequence names in the index.
77
+ # @return [Array<String>] sequence names
65
78
  def names
79
+ check_closed
66
80
  Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
67
81
  end
68
82
 
69
83
  alias keys names
70
84
 
85
+ # Check if a sequence exists in the index.
86
+ # @param key [String, Symbol] sequence name
87
+ # @return [Boolean] true if the sequence exists
71
88
  def has_key?(key)
89
+ check_closed
72
90
  raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
73
91
 
74
92
  key = key.to_s
75
93
  case LibHTS.faidx_has_seq(@fai, key)
76
94
  when 1 then true
77
95
  when 0 then false
78
- else raise
96
+ else raise HTS::Error, "Unexpected return value from faidx_has_seq"
79
97
  end
80
98
  end
81
99
 
100
+ # Get a Sequence object by name or index.
101
+ # @param name [String, Symbol, Integer] sequence name or index
102
+ # @return [Sequence] the sequence object
103
+ # @raise [ArgumentError] if the sequence does not exist
82
104
  def [](name)
105
+ check_closed
83
106
  name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
84
107
  Sequence.new(self, name)
85
108
  end
86
109
 
87
- # return the length of the requested chromosome.
110
+ # Return the length of the requested chromosome.
111
+ # @param chrom [String, Symbol] chromosome name
112
+ # @return [Integer] sequence length
113
+ # @raise [ArgumentError] if the sequence does not exist
88
114
  def seq_len(chrom)
115
+ check_closed
89
116
  raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
90
117
 
91
118
  chrom = chrom.to_s
92
119
  result = LibHTS.faidx_seq_len(@fai, chrom)
93
- result == -1 ? nil : result
120
+ raise ArgumentError, "Sequence not found: #{chrom}" if result == -1
121
+
122
+ result
94
123
  end
95
124
 
96
- # @overload seq(name)
125
+ # @overload fetch_seq(name)
97
126
  # Fetch the sequence as a String.
98
- # @param name [String] chr1:0-10
99
- # @overload seq(name, start, stop)
127
+ # @param name [String, Symbol] chr1:0-10
128
+ # @return [String] the sequence
129
+ # @overload fetch_seq(name, start, stop)
100
130
  # Fetch the sequence as a String.
101
- # @param name [String] the name of the chromosome
131
+ # @param name [String, Symbol] the name of the chromosome
102
132
  # @param start [Integer] the start position of the sequence (0-based)
103
133
  # @param stop [Integer] the end position of the sequence (0-based)
104
134
  # @return [String] the sequence
105
-
106
135
  def fetch_seq(name, start = nil, stop = nil)
136
+ check_closed
107
137
  name = name.to_s
108
138
  rlen = FFI::MemoryPointer.new(:int)
109
139
 
110
140
  if start.nil? && stop.nil?
111
141
  result = LibHTS.fai_fetch64(@fai, name, rlen)
112
142
  else
113
- start < 0 && raise(ArgumentError, "Expect start to be >= 0")
114
- stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
115
- start > stop && raise(ArgumentError, "Expect start to be <= stop")
116
- stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
117
-
143
+ validate_range!(name, start, stop)
118
144
  result = LibHTS.faidx_fetch_seq64(@fai, name, start, stop, rlen)
119
145
  end
120
146
 
121
147
  case rlen.read_int
122
- when -2 then raise "Invalid chromosome name: #{name}"
123
- when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
148
+ when -2 then raise ArgumentError, "Invalid chromosome name: #{name}"
149
+ when -1 then raise HTS::Error, "Error fetching sequence: #{name}:#{start}-#{stop}"
124
150
  end
125
151
 
126
152
  result
@@ -128,29 +154,57 @@ module HTS
128
154
 
129
155
  alias seq fetch_seq
130
156
 
157
+ # @overload fetch_qual(name)
158
+ # Fetch the quality string.
159
+ # @param name [String, Symbol] sequence name
160
+ # @return [String] the quality string
161
+ # @overload fetch_qual(name, start, stop)
162
+ # Fetch the quality string.
163
+ # @param name [String, Symbol] the name of the chromosome
164
+ # @param start [Integer] the start position of the sequence (0-based)
165
+ # @param stop [Integer] the end position of the sequence (0-based)
166
+ # @return [String] the quality string
131
167
  def fetch_qual(name, start = nil, stop = nil)
168
+ check_closed
132
169
  name = name.to_s
133
170
  rlen = FFI::MemoryPointer.new(:int)
134
171
 
135
172
  if start.nil? && stop.nil?
136
173
  result = LibHTS.fai_fetchqual64(@fai, name, rlen)
137
174
  else
138
- start < 0 && raise(ArgumentError, "Expect start to be >= 0")
139
- stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
140
- start > stop && raise(ArgumentError, "Expect start to be <= stop")
141
- stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
142
-
175
+ validate_range!(name, start, stop)
143
176
  result = LibHTS.faidx_fetch_qual64(@fai, name, start, stop, rlen)
144
177
  end
145
178
 
146
179
  case rlen.read_int
147
- when -2 then raise "Invalid chromosome name: #{name}"
148
- when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
180
+ when -2 then raise ArgumentError, "Invalid chromosome name: #{name}"
181
+ when -1 then raise HTS::Error, "Error fetching quality: #{name}:#{start}-#{stop}"
149
182
  end
150
183
 
151
184
  result
152
185
  end
153
186
 
154
187
  alias qual fetch_qual
188
+
189
+ private
190
+
191
+ def check_closed
192
+ raise IOError, "closed Faidx" if closed?
193
+ end
194
+
195
+ # Validate range parameters.
196
+ # @param name [String] sequence name
197
+ # @param start [Integer] start position (0-based)
198
+ # @param stop [Integer] stop position (0-based)
199
+ # @raise [ArgumentError] if range is invalid
200
+ def validate_range!(name, start, stop)
201
+ raise ArgumentError, "Expect start to be >= 0" if start < 0
202
+ raise ArgumentError, "Expect stop to be >= 0" if stop < 0
203
+ raise ArgumentError, "Expect start to be <= stop" if start > stop
204
+
205
+ len = seq_len(name)
206
+ raise ArgumentError, "Sequence not found: #{name}" if len.nil?
207
+ raise ArgumentError, "Expect stop to be < seq_len (#{len})" if stop >= len
208
+ end
155
209
  end
156
210
  end
data/lib/hts/hts.rb CHANGED
@@ -13,7 +13,7 @@ module HTS
13
13
  check_closed
14
14
  position = tell
15
15
  ary = map(&name)
16
- seek(position)
16
+ seek(position) if position
17
17
  ary
18
18
  end
19
19
  end
@@ -4,7 +4,6 @@ module HTS
4
4
  # Module for working with C HTSlib.
5
5
  module LibHTS
6
6
  typedef :int64, :hts_pos_t
7
- typedef :pointer, :bam_plp_auto_f
8
7
 
9
8
  # kstring
10
9
 
@@ -352,6 +351,36 @@ module HTS
352
351
  end
353
352
  end
354
353
 
354
+ # Internal: Non-owning view of bam1_t used when the pointer is managed by HTSlib
355
+ # (e.g., pileup/mpileup). This struct mirrors the layout of bam1_t and MUST NOT
356
+ # free memory on GC. Do not expose publicly; use only for read-only access.
357
+ class Bam1View < FFI::Struct
358
+ layout \
359
+ :core, Bam1Core,
360
+ :id, :uint64,
361
+ :data, :pointer, # uint8_t
362
+ :l_data, :int,
363
+ :m_data, :uint32,
364
+ :_mempolicy, :uint32 # bit_fields
365
+ end
366
+
367
+ # Base modification structure
368
+ class HtsBaseMod < FFI::Struct
369
+ layout \
370
+ :modified_base, :int,
371
+ :canonical_base, :int,
372
+ :strand, :int,
373
+ :qual, :int
374
+ end
375
+
376
+ # Base modification state (opaque pointer)
377
+ # Use AutoPointer since the structure is opaque and we only need custom release.
378
+ class HtsBaseModState < FFI::AutoPointer
379
+ def self.release(ptr)
380
+ LibHTS.hts_base_mod_state_free(ptr) unless ptr.null?
381
+ end
382
+ end
383
+
355
384
  typedef :pointer, :bam_plp
356
385
  typedef :pointer, :bam_mplp
357
386
 
@@ -364,7 +393,7 @@ module HTS
364
393
 
365
394
  class BamPileup1 < FFI::BitStruct
366
395
  layout \
367
- :b, Bam1.ptr,
396
+ :b, :pointer,
368
397
  :qpos, :int32,
369
398
  :indel, :int,
370
399
  :level, :int,
@@ -410,7 +439,13 @@ module HTS
410
439
 
411
440
  FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
412
441
 
413
- class Faidx < FFI::Struct # FIXME: ManagedStruct
442
+ # Faidx represents a faidx_t handle which is treated as a
443
+ # file-level RAII object in HTS::Faidx. It is intentionally
444
+ # kept as a plain Struct and is destroyed explicitly via
445
+ # LibHTS.fai_destroy in HTS::Faidx#close. Do not convert this
446
+ # to ManagedStruct; that would interfere with the explicit
447
+ # lifetime managed by the Ruby wrapper.
448
+ class Faidx < FFI::Struct
414
449
  layout :bgzf, BGZF.ptr,
415
450
  :n, :int,
416
451
  :m, :int,