htslib 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea68a4c331c9a404cfce2bf86fea386985e96ee3b76ae25e9d9b701593294880
4
- data.tar.gz: d826e59f66e20bc40bc47c2033295abe2b3aceab8cb9d5af3d4d41309e448732
3
+ metadata.gz: 9ed0d57a77d113e37ce3a9c8bf75ad8e35640a82eb51d7af09f39409c26a04ae
4
+ data.tar.gz: a5a4092321c2245fd4416ffe2b6b50014b5a6af1a40f20a4ca55a1296b96a2ff
5
5
  SHA512:
6
- metadata.gz: 646dca4eb44c96a67020f57a090c26715b003521c9c6afffad1becf031576c334ea03c99b61f795a35932935535c53a1899a960ab986480cb3f5eef5b9913b96
7
- data.tar.gz: e1cc5d9357932e04cebae1aaa5a8dc7024e0755584ea21999b18004dba76726c10b23276ee6d98fc1580ffb5aad45aeb12fd3e1bf94cfc45fdee70aefda87f91
6
+ metadata.gz: abd2f5234927ee1ba553c40194865ccfca47e24338c0859b46c370f248d9f7e602068500d6a49bd80b8cb4a12ef3f3e78efb49ff3d30b628bc1fea519d3d3fd6
7
+ data.tar.gz: af733241e107742ddc702619ac8ab226daa72bdc515c9f591bb3317be1d3f6556070df8afa664066aca6b38466a84cf9c45c4b76e32c68d607ada759818f7f1c
data/README.md CHANGED
@@ -165,8 +165,6 @@ Try Crystal. [HTS.cr](https://github.com/bio-cr/hts.cr) is implemented in Crysta
165
165
 
166
166
  ## Development
167
167
 
168
- ![Diagram](diagram.svg)
169
-
170
168
  #### Compile from source code
171
169
 
172
170
  [GNU Autotools](https://en.wikipedia.org/wiki/GNU_Autotools) is required to compile htslib.
data/TUTORIAL.md CHANGED
@@ -254,6 +254,51 @@ in.close
254
254
  out.close
255
255
  ```
256
256
 
257
+ Writing and modifying auxiliary tags
258
+
259
+ ```ruby
260
+ # Reading auxiliary tags
261
+ bam = HTS::Bam.open("input.bam")
262
+ record = bam.first
263
+ aux = record.aux
264
+
265
+ # Read tags
266
+ alignment_score = aux["AS"] # Auto-detect type
267
+ mc_cigar = aux.get_string("MC") # Type-specific getter
268
+ edit_distance = aux.get_int("NM") # Type-specific getter
269
+
270
+ # Writing/updating auxiliary tags
271
+ in_bam = HTS::Bam.open("input.bam")
272
+ out_bam = HTS::Bam.open("output.bam", "wb")
273
+ out_bam.write_header(in_bam.header)
274
+
275
+ in_bam.each do |record|
276
+ aux = record.aux
277
+
278
+ # Update or add tags using type-specific methods
279
+ aux.update_int("AS", 100) # Integer tag
280
+ aux.update_float("ZQ", 0.95) # Float tag
281
+ aux.update_string("RG", "sample1") # String tag
282
+ aux.update_array("BC", [25, 30, 28, 32]) # Array tag
283
+
284
+ # Or use the []= operator (auto-detects type)
285
+ aux["NM"] = 2 # Integer
286
+ aux["ZS"] = "modified" # String
287
+ aux["ZF"] = 3.14 # Float
288
+ aux["ZA"] = [1, 2, 3, 4] # Array
289
+
290
+ # Check if tag exists
291
+ if aux.key?("XS")
292
+ aux.delete("XS") # Delete tag
293
+ end
294
+
295
+ out_bam.write(record)
296
+ end
297
+
298
+ in_bam.close
299
+ out_bam.close
300
+ ```
301
+
257
302
  Create index
258
303
 
259
304
  ```ruby
data/lib/hts/bam/auxi.rb CHANGED
@@ -55,6 +55,124 @@ module HTS
55
55
  get(key)
56
56
  end
57
57
 
58
+ # Set auxiliary tag value (auto-detects type from value)
59
+ # For compatibility with HTS.cr.
60
+ # @param key [String] tag name (2 characters)
61
+ # @param value [Integer, Float, String, Array] tag value
62
+ def []=(key, value)
63
+ case value
64
+ when Integer
65
+ update_int(key, value)
66
+ when Float
67
+ update_float(key, value)
68
+ when String
69
+ update_string(key, value)
70
+ when Array
71
+ update_array(key, value)
72
+ else
73
+ raise ArgumentError, "Unsupported type: #{value.class}"
74
+ end
75
+ end
76
+
77
+ # Update or add an integer tag
78
+ # For compatibility with HTS.cr.
79
+ # @param key [String] tag name (2 characters)
80
+ # @param value [Integer] integer value
81
+ def update_int(key, value)
82
+ ret = LibHTS.bam_aux_update_int(@record.struct, key, value.to_i)
83
+ raise "Failed to update integer tag '#{key}': errno #{FFI.errno}" if ret < 0
84
+
85
+ value
86
+ end
87
+
88
+ # Update or add a floating-point tag
89
+ # For compatibility with HTS.cr.
90
+ # @param key [String] tag name (2 characters)
91
+ # @param value [Float] floating-point value
92
+ def update_float(key, value)
93
+ ret = LibHTS.bam_aux_update_float(@record.struct, key, value.to_f)
94
+ raise "Failed to update float tag '#{key}': errno #{FFI.errno}" if ret < 0
95
+
96
+ value
97
+ end
98
+
99
+ # Update or add a string tag
100
+ # For compatibility with HTS.cr.
101
+ # @param key [String] tag name (2 characters)
102
+ # @param value [String] string value
103
+ def update_string(key, value)
104
+ ret = LibHTS.bam_aux_update_str(@record.struct, key, -1, value.to_s)
105
+ raise "Failed to update string tag '#{key}': errno #{FFI.errno}" if ret < 0
106
+
107
+ value
108
+ end
109
+
110
+ # Update or add an array tag
111
+ # For compatibility with HTS.cr.
112
+ # @param key [String] tag name (2 characters)
113
+ # @param value [Array] array of integers or floats
114
+ # @param type [String, nil] element type ('c', 'C', 's', 'S', 'i', 'I', 'f'). Auto-detected if nil.
115
+ def update_array(key, value, type: nil)
116
+ raise ArgumentError, "Array cannot be empty" if value.empty?
117
+
118
+ # Auto-detect type if not specified
119
+ if type.nil?
120
+ if value.all? { |v| v.is_a?(Integer) }
121
+ # Use 'i' for signed 32-bit integers by default
122
+ type = "i"
123
+ elsif value.all? { |v| v.is_a?(Float) || v.is_a?(Integer) }
124
+ type = "f"
125
+ else
126
+ raise ArgumentError, "Array must contain only integers or floats"
127
+ end
128
+ end
129
+
130
+ # Convert array to appropriate C type
131
+ case type
132
+ when "c", "C", "s", "S", "i", "I"
133
+ # Integer types
134
+ ptr = FFI::MemoryPointer.new(:int32, value.size)
135
+ ptr.write_array_of_int32(value.map(&:to_i))
136
+ ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
137
+ when "f"
138
+ # Float type
139
+ ptr = FFI::MemoryPointer.new(:float, value.size)
140
+ ptr.write_array_of_float(value.map(&:to_f))
141
+ ret = LibHTS.bam_aux_update_array(@record.struct, key, type.ord, value.size, ptr)
142
+ else
143
+ raise ArgumentError, "Invalid array type: #{type}"
144
+ end
145
+
146
+ raise "Failed to update array tag '#{key}': errno #{FFI.errno}" if ret < 0
147
+
148
+ value
149
+ end
150
+
151
+ # Delete an auxiliary tag
152
+ # For compatibility with HTS.cr.
153
+ # @param key [String] tag name (2 characters)
154
+ # @return [Boolean] true if tag was deleted, false if tag was not found
155
+ def delete(key)
156
+ aux_ptr = LibHTS.bam_aux_get(@record.struct, key)
157
+ return false if aux_ptr.null?
158
+
159
+ ret = LibHTS.bam_aux_del(@record.struct, aux_ptr)
160
+ raise "Failed to delete tag '#{key}': errno #{FFI.errno}" if ret < 0
161
+
162
+ true
163
+ end
164
+
165
+ # Check if a tag exists
166
+ # For compatibility with HTS.cr.
167
+ # @param key [String] tag name (2 characters)
168
+ # @return [Boolean] true if tag exists
169
+ def key?(key)
170
+ aux_ptr = LibHTS.bam_aux_get(@record.struct, key)
171
+ !aux_ptr.null?
172
+ end
173
+
174
+ alias include? key?
175
+
58
176
  def first
59
177
  aux_ptr = first_pointer
60
178
  return nil if aux_ptr.null?
@@ -0,0 +1,343 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bam < Hts
5
+ # Base modification information from MM/ML tags
6
+ #
7
+ # This class provides access to DNA/RNA base modifications such as methylation.
8
+ # It wraps the htslib base modification API and provides a Ruby-friendly interface.
9
+ #
10
+ # @note BaseMod is a view object that references data in a Record.
11
+ # The state is maintained in hts_base_mod_state structure.
12
+ class BaseMod
13
+ include Enumerable
14
+
15
+ class NotParsedError < StandardError; end
16
+
17
+ attr_reader :record
18
+
19
+ # Individual base modification information
20
+ class Modification
21
+ attr_reader :modified_base, :canonical_base, :strand, :qual
22
+
23
+ # @param modified_base [Integer] Modification code as char or -ChEBI
24
+ # @param canonical_base [Integer] Canonical base (A, C, G, T, N)
25
+ # @param strand [Integer] 0 or 1 for +/- strand
26
+ # @param qual [Integer] Quality (256*probability) or -1 if unknown
27
+ def initialize(modified_base:, canonical_base:, strand:, qual:)
28
+ @modified_base = modified_base
29
+ @canonical_base = canonical_base
30
+ @strand = strand
31
+ @qual = qual
32
+ end
33
+
34
+ # Get modification code as character or ChEBI number as string
35
+ # @return [String] Single character code or ChEBI number as string
36
+ def code
37
+ @modified_base > 0 ? @modified_base.chr : @modified_base.to_s
38
+ end
39
+
40
+ # Get canonical base as character
41
+ # @return [String] Single character (A, C, G, T, N)
42
+ def canonical
43
+ @canonical_base.chr
44
+ end
45
+
46
+ # Get likelihood as a probability (0.0-1.0)
47
+ # @return [Float, nil] Probability or nil if qual is -1
48
+ def probability
49
+ return nil if @qual == -1
50
+
51
+ @qual / 256.0
52
+ end
53
+
54
+ # Convert to hash representation
55
+ # @return [Hash] Hash with modification information
56
+ def to_h
57
+ {
58
+ modified_base: @modified_base,
59
+ code: code,
60
+ canonical_base: @canonical_base,
61
+ canonical: canonical,
62
+ strand: @strand,
63
+ qual: @qual,
64
+ probability: probability
65
+ }
66
+ end
67
+
68
+ # String representation
69
+ # @return [String] String representation of the modification
70
+ def to_s
71
+ if @qual >= 0
72
+ "#{canonical}->#{code}(#{probability.round(3)})"
73
+ else
74
+ "#{canonical}->#{code}"
75
+ end
76
+ end
77
+
78
+ # Inspect string
79
+ # @return [String] Inspect string
80
+ def inspect
81
+ "#<HTS::Bam::BaseMod::Modification #{self}>"
82
+ end
83
+ end
84
+
85
+ # Position-specific modification information
86
+ class Position
87
+ attr_reader :position, :modifications
88
+
89
+ # @param position [Integer] Position in query sequence
90
+ # @param modifications [Array<Modification>] Array of modifications at this position
91
+ def initialize(position, modifications)
92
+ @position = position
93
+ @modifications = modifications
94
+ end
95
+
96
+ # Check if this position has methylation
97
+ # @return [Boolean] true if any modification is methylation ('m')
98
+ def methylated?
99
+ @modifications.any? { |m| m.code == "m" }
100
+ end
101
+
102
+ # Check if this position has hydroxymethylation
103
+ # @return [Boolean] true if any modification is hydroxymethylation ('h')
104
+ def hydroxymethylated?
105
+ @modifications.any? { |m| m.code == "h" }
106
+ end
107
+
108
+ # Convert to hash representation
109
+ # @return [Hash] Hash with position information
110
+ def to_h
111
+ {
112
+ position: @position,
113
+ modifications: @modifications.map(&:to_h)
114
+ }
115
+ end
116
+
117
+ # String representation
118
+ # @return [String] String representation
119
+ def to_s
120
+ mods_str = @modifications.map(&:to_s).join(", ")
121
+ "pos=#{@position} [#{mods_str}]"
122
+ end
123
+
124
+ # Inspect string
125
+ # @return [String] Inspect string
126
+ def inspect
127
+ "#<HTS::Bam::BaseMod::Position #{self}>"
128
+ end
129
+ end
130
+
131
+ # Initialize a new BaseMod object
132
+ # @param record [Record] The BAM record to extract modifications from
133
+ # @param auto_parse [Boolean] If true, parse MM/ML lazily on first access
134
+ def initialize(record, auto_parse: true)
135
+ @record = record
136
+ @state = LibHTS.hts_base_mod_state_alloc
137
+ @closed = false
138
+ @auto_parse = !!auto_parse
139
+ @parsed = false
140
+ raise Error, "Failed to allocate hts_base_mod_state" if @state.null?
141
+ end
142
+
143
+ # Explicitly free the state
144
+ # @return [void]
145
+ def close
146
+ return if @closed
147
+
148
+ # With HtsBaseModState as an AutoPointer, releasing the Ruby object
149
+ # is sufficient. Avoid manual free to prevent double-free.
150
+ @state = nil
151
+ @closed = true
152
+ end
153
+
154
+ # Whether this object has parsed MM/ML tags already
155
+ # @return [Boolean]
156
+ def parsed?
157
+ @parsed
158
+ end
159
+
160
+ # Ensure MM/ML have been parsed, performing lazy parse if enabled.
161
+ # @param flags [Integer]
162
+ # @return [void]
163
+ def ensure_parsed!(flags = 0)
164
+ return if @parsed
165
+
166
+ raise NotParsedError, "BaseMod is not parsed. Call #parse first (auto_parse is disabled)." unless @auto_parse
167
+
168
+ parse(flags)
169
+ end
170
+
171
+ # Parse MM and ML tags from the record
172
+ # @param flags [Integer] Parsing flags (default: 0)
173
+ # @return [Integer] Number of modification types found, or -1 on error
174
+ # @raise [Error] If parsing fails
175
+ def parse(flags = 0)
176
+ ret = LibHTS.bam_parse_basemod2(@record.struct, @state, flags)
177
+ raise Error, "Failed to parse base modifications" if ret < 0
178
+
179
+ @parsed = true
180
+ ret
181
+ end
182
+
183
+ # Get modification information at a specific query position
184
+ # @param position [Integer] Query position (0-based)
185
+ # @param max_mods [Integer] Maximum number of modifications to retrieve
186
+ # @return [Position, nil] Position object with modifications, or nil if none
187
+ def at_pos(position, max_mods: 10)
188
+ # Reset state to ensure deterministic results even after prior iteration
189
+ parsed? ? parse : ensure_parsed!
190
+
191
+ mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
192
+
193
+ ret = LibHTS.bam_mods_at_qpos(@record.struct, position, @state,
194
+ mods_ptr, max_mods)
195
+ return nil if ret <= 0
196
+
197
+ build_position(position, mods_ptr, [ret, max_mods].min)
198
+ end
199
+
200
+ # Array-style access to modifications at a position
201
+ # @param position [Integer] Query position (0-based)
202
+ # @return [Position, nil] Position object with modifications, or nil if none
203
+ def [](position)
204
+ at_pos(position)
205
+ end
206
+
207
+ # Iterate over all positions with modifications
208
+ # @param max_mods [Integer] Maximum number of modifications per position
209
+ # @yield [Position] Position object for each modified position
210
+ # @return [Enumerator] If no block given
211
+ def each_position(max_mods: 10)
212
+ return enum_for(__method__, max_mods: max_mods) unless block_given?
213
+
214
+ # Reset state at the start of iteration to allow repeated enumerations
215
+ parsed? ? parse : ensure_parsed!
216
+
217
+ pos_ptr = FFI::MemoryPointer.new(:int)
218
+ mods_ptr = FFI::MemoryPointer.new(LibHTS::HtsBaseMod, max_mods)
219
+
220
+ loop do
221
+ ret = LibHTS.bam_next_basemod(@record.struct, @state,
222
+ mods_ptr, max_mods, pos_ptr)
223
+ break if ret <= 0
224
+
225
+ position = pos_ptr.read_int
226
+ yield build_position(position, mods_ptr, [ret, max_mods].min)
227
+ end
228
+ end
229
+
230
+ alias each each_position
231
+
232
+ # Get list of modification types present in this record
233
+ # @return [Array<Integer>] Array of modification codes (char code or -ChEBI)
234
+ def modification_types
235
+ ensure_parsed!
236
+
237
+ ntype_ptr = FFI::MemoryPointer.new(:int)
238
+ codes_ptr = LibHTS.bam_mods_recorded(@state, ntype_ptr)
239
+
240
+ ntype = ntype_ptr.read_int
241
+ return [] if ntype <= 0 || codes_ptr.null?
242
+
243
+ codes_ptr.read_array_of_int(ntype)
244
+ end
245
+
246
+ alias recorded_types modification_types
247
+
248
+ # Query information about a specific modification type by code
249
+ # @param code [Integer, String] Modification code (char code or -ChEBI, or single char string)
250
+ # @return [Hash, nil] Hash with canonical, strand, implicit info, or nil if not found
251
+ def query_type(code)
252
+ ensure_parsed!
253
+
254
+ code = code.ord if code.is_a?(String)
255
+
256
+ strand_ptr = FFI::MemoryPointer.new(:int)
257
+ implicit_ptr = FFI::MemoryPointer.new(:int)
258
+ canonical_ptr = FFI::MemoryPointer.new(:char, 1)
259
+
260
+ ret = LibHTS.bam_mods_query_type(@state, code, strand_ptr,
261
+ implicit_ptr, canonical_ptr)
262
+ return nil if ret < 0
263
+
264
+ {
265
+ canonical: canonical_ptr.read_char.chr,
266
+ strand: strand_ptr.read_int,
267
+ implicit: implicit_ptr.read_int != 0
268
+ }
269
+ end
270
+
271
+ # Query information about i-th modification type
272
+ # @param index [Integer] Modification type index (0-based)
273
+ # @return [Hash, nil] Hash with code, canonical, strand, implicit info
274
+ def query_type_at(index)
275
+ ensure_parsed!
276
+
277
+ strand_ptr = FFI::MemoryPointer.new(:int)
278
+ implicit_ptr = FFI::MemoryPointer.new(:int)
279
+ canonical_ptr = FFI::MemoryPointer.new(:char, 1)
280
+
281
+ ret = LibHTS.bam_mods_queryi(@state, index, strand_ptr,
282
+ implicit_ptr, canonical_ptr)
283
+ return nil if ret < 0
284
+
285
+ types = modification_types
286
+ {
287
+ code: types[index],
288
+ canonical: canonical_ptr.read_char.chr,
289
+ strand: strand_ptr.read_int,
290
+ implicit: implicit_ptr.read_int != 0
291
+ }
292
+ end
293
+
294
+ # Get all modifications as an array
295
+ # @return [Array<Position>] Array of all positions with modifications
296
+ def to_a
297
+ each_position.to_a
298
+ end
299
+
300
+ # String representation for debugging
301
+ # @return [String] String representation
302
+ def to_s
303
+ return "#<HTS::Bam::BaseMod (not parsed)>" unless @parsed
304
+
305
+ mods = []
306
+ each_position do |pos|
307
+ mods << pos.to_s
308
+ end
309
+ "#<HTS::Bam::BaseMod #{mods.join(' ')}>"
310
+ end
311
+
312
+ # Inspect string
313
+ # @return [String] Inspect string
314
+ def inspect
315
+ to_s
316
+ end
317
+
318
+ private
319
+
320
+ # Build Position object from hts_base_mod array
321
+ # @param position [Integer] Query position
322
+ # @param mods_ptr [FFI::Pointer] Pointer to array of HtsBaseMod structures
323
+ # @param n_mods [Integer] Number of modifications
324
+ # @return [Position] Position object
325
+ def build_position(position, mods_ptr, n_mods)
326
+ modifications = []
327
+
328
+ n_mods.times do |i|
329
+ mod_struct = LibHTS::HtsBaseMod.new(mods_ptr + i * LibHTS::HtsBaseMod.size)
330
+
331
+ modifications << Modification.new(
332
+ modified_base: mod_struct[:modified_base],
333
+ canonical_base: mod_struct[:canonical_base],
334
+ strand: mod_struct[:strand],
335
+ qual: mod_struct[:qual]
336
+ )
337
+ end
338
+
339
+ Position.new(position, modifications)
340
+ end
341
+ end
342
+ end
343
+ end
@@ -111,6 +111,23 @@ module HTS
111
111
  name2tid(name)
112
112
  end
113
113
 
114
+ # Add a @PG (program) line to the header
115
+ # @param program_name [String] Name of the program
116
+ # @param options [Hash] Key-value pairs for @PG tags (ID, PN, VN, CL, PP, etc.)
117
+ # @return [Integer] 0 on success, -1 on failure
118
+ #
119
+ # This is a convenience wrapper around sam_hdr_add_pg that automatically:
120
+ # - Generates a unique ID if the specified one clashes
121
+ # - Manages PP (previous program) chains automatically
122
+ #
123
+ # @example
124
+ # header.add_pg("bwa", VN: "0.7.17", CL: "bwa mem ref.fa read.fq")
125
+ # header.add_pg("samtools", VN: "1.15", PP: "bwa")
126
+ def add_pg(program_name, **options)
127
+ args = options.flat_map { |k, v| [:string, k.to_s, :string, v.to_s] }
128
+ LibHTS.sam_hdr_add_pg(@sam_hdr, program_name, *args, :pointer, FFI::Pointer::NULL)
129
+ end
130
+
114
131
  private
115
132
 
116
133
  def name2tid(name)