htslib 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +23 -1
- data/lib/hts/bam/auxi.rb +228 -19
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +119 -36
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +85 -102
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +34 -2
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +10 -5
- data/lib/hts/version.rb +1 -1
- metadata +4 -4
- data/lib/hts/faidx/sequence.rb +0 -62
data/lib/hts/bam/header.rb
CHANGED
|
@@ -6,6 +6,39 @@ module HTS
|
|
|
6
6
|
class Bam < Hts
|
|
7
7
|
# A class for working with alignment header.
|
|
8
8
|
class Header
|
|
9
|
+
HD_TAG_MAP = {
|
|
10
|
+
version: "VN",
|
|
11
|
+
sort_order: "SO",
|
|
12
|
+
group_order: "GO",
|
|
13
|
+
subsorting: "SS"
|
|
14
|
+
}.freeze
|
|
15
|
+
|
|
16
|
+
SQ_TAG_MAP = {
|
|
17
|
+
name: "SN",
|
|
18
|
+
length: "LN",
|
|
19
|
+
assembly: "AS",
|
|
20
|
+
md5: "M5",
|
|
21
|
+
species: "SP",
|
|
22
|
+
uri: "UR",
|
|
23
|
+
alt_names: "AN"
|
|
24
|
+
}.freeze
|
|
25
|
+
|
|
26
|
+
RG_TAG_MAP = {
|
|
27
|
+
id: "ID",
|
|
28
|
+
sample: "SM",
|
|
29
|
+
library: "LB",
|
|
30
|
+
platform: "PL",
|
|
31
|
+
platform_unit: "PU",
|
|
32
|
+
center: "CN",
|
|
33
|
+
description: "DS",
|
|
34
|
+
date: "DT",
|
|
35
|
+
flow_order: "FO",
|
|
36
|
+
key_sequence: "KS",
|
|
37
|
+
program: "PG",
|
|
38
|
+
insert_size: "PI",
|
|
39
|
+
molecule_topology: "PM"
|
|
40
|
+
}.freeze
|
|
41
|
+
|
|
9
42
|
def self.parse(text)
|
|
10
43
|
new(LibHTS.sam_hdr_parse(text.size, text))
|
|
11
44
|
end
|
|
@@ -66,6 +99,11 @@ module HTS
|
|
|
66
99
|
add_lines(...)
|
|
67
100
|
end
|
|
68
101
|
|
|
102
|
+
def append(line)
|
|
103
|
+
add_lines(ensure_newline(line.to_s))
|
|
104
|
+
self
|
|
105
|
+
end
|
|
106
|
+
|
|
69
107
|
# experimental
|
|
70
108
|
def <<(obj)
|
|
71
109
|
case obj
|
|
@@ -81,15 +119,33 @@ module HTS
|
|
|
81
119
|
# experimental
|
|
82
120
|
def find_line(type, key, value)
|
|
83
121
|
ks = LibHTS::KString.new
|
|
84
|
-
|
|
85
|
-
|
|
122
|
+
begin
|
|
123
|
+
r = LibHTS.sam_hdr_find_line_id(@sam_hdr, type, key, value, ks)
|
|
124
|
+
r == 0 ? ks.read_string_copy : nil
|
|
125
|
+
ensure
|
|
126
|
+
ks.free_buffer
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def find_tag(type, id_key, id_value, key)
|
|
131
|
+
ks = LibHTS::KString.new
|
|
132
|
+
begin
|
|
133
|
+
r = LibHTS.sam_hdr_find_tag_id(@sam_hdr, type, id_key, id_value, key, ks)
|
|
134
|
+
r == 0 ? ks.read_string_copy : nil
|
|
135
|
+
ensure
|
|
136
|
+
ks.free_buffer
|
|
137
|
+
end
|
|
86
138
|
end
|
|
87
139
|
|
|
88
140
|
# experimental
|
|
89
141
|
def find_line_at(type, pos)
|
|
90
142
|
ks = LibHTS::KString.new
|
|
91
|
-
|
|
92
|
-
|
|
143
|
+
begin
|
|
144
|
+
r = LibHTS.sam_hdr_find_line_pos(@sam_hdr, type, pos, ks)
|
|
145
|
+
r == 0 ? ks.read_string_copy : nil
|
|
146
|
+
ensure
|
|
147
|
+
ks.free_buffer
|
|
148
|
+
end
|
|
93
149
|
end
|
|
94
150
|
|
|
95
151
|
# experimental
|
|
@@ -102,6 +158,26 @@ module HTS
|
|
|
102
158
|
LibHTS.sam_hdr_remove_line_pos(@sam_hdr, type, pos)
|
|
103
159
|
end
|
|
104
160
|
|
|
161
|
+
def delete_line(type, key = nil, value = nil)
|
|
162
|
+
LibHTS.sam_hdr_remove_line_id(@sam_hdr, type, key, value).zero?
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def delete_tag(type, id_key, id_value, key)
|
|
166
|
+
LibHTS.sam_hdr_remove_tag_id(@sam_hdr, type, id_key, id_value, key) == 1
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def count_lines(type)
|
|
170
|
+
LibHTS.sam_hdr_count_lines(@sam_hdr, type)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def line_index(type, key)
|
|
174
|
+
LibHTS.sam_hdr_line_index(@sam_hdr, type, key)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def line_name(type, pos)
|
|
178
|
+
LibHTS.sam_hdr_line_name(@sam_hdr, type, pos)
|
|
179
|
+
end
|
|
180
|
+
|
|
105
181
|
def to_s
|
|
106
182
|
LibHTS.sam_hdr_str(@sam_hdr)
|
|
107
183
|
end
|
|
@@ -111,6 +187,46 @@ module HTS
|
|
|
111
187
|
name2tid(name)
|
|
112
188
|
end
|
|
113
189
|
|
|
190
|
+
def update_hd(**tags)
|
|
191
|
+
pairs = merge_sam_pairs(find_line_pairs("HD", nil, nil), normalize_hd_tags(tags))
|
|
192
|
+
replace_sam_line("HD", nil, nil, pairs, %w[VN SO GO SS])
|
|
193
|
+
self
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def add_sq(name, length:, **tags)
|
|
197
|
+
pairs = [["SN", name.to_s], ["LN", length.to_s]]
|
|
198
|
+
pairs.concat normalize_sq_tags(tags)
|
|
199
|
+
add_structured_sam_line("SQ", pairs, %w[SN LN AS M5 SP UR AN])
|
|
200
|
+
self
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def update_sq(name, **tags)
|
|
204
|
+
pairs = merge_identified_sam_line("SQ", "SN", name.to_s, normalize_sq_tags(tags), protected_keys: ["SN"])
|
|
205
|
+
replace_sam_line("SQ", "SN", name.to_s, pairs, %w[SN LN AS M5 SP UR AN])
|
|
206
|
+
self
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def remove_sq(name)
|
|
210
|
+
delete_line("SQ", "SN", name.to_s)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def add_rg(id, **tags)
|
|
214
|
+
pairs = [["ID", id.to_s]]
|
|
215
|
+
pairs.concat normalize_rg_tags(tags)
|
|
216
|
+
add_structured_sam_line("RG", pairs, %w[ID SM LB PL PU CN DS DT FO KS PG PI PM])
|
|
217
|
+
self
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def update_rg(id, **tags)
|
|
221
|
+
pairs = merge_identified_sam_line("RG", "ID", id.to_s, normalize_rg_tags(tags), protected_keys: ["ID"])
|
|
222
|
+
replace_sam_line("RG", "ID", id.to_s, pairs, %w[ID SM LB PL PU CN DS DT FO KS PG PI PM])
|
|
223
|
+
self
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def remove_rg(id)
|
|
227
|
+
delete_line("RG", "ID", id.to_s)
|
|
228
|
+
end
|
|
229
|
+
|
|
114
230
|
# Add a @PG (program) line to the header
|
|
115
231
|
# @param program_name [String] Name of the program
|
|
116
232
|
# @param options [Hash] Key-value pairs for @PG tags (ID, PN, VN, CL, PP, etc.)
|
|
@@ -124,12 +240,183 @@ module HTS
|
|
|
124
240
|
# header.add_pg("bwa", VN: "0.7.17", CL: "bwa mem ref.fa read.fq")
|
|
125
241
|
# header.add_pg("samtools", VN: "1.15", PP: "bwa")
|
|
126
242
|
def add_pg(program_name, **options)
|
|
127
|
-
|
|
128
|
-
LibHTS.
|
|
243
|
+
line = build_pg_line(program_name.to_s, options)
|
|
244
|
+
result = LibHTS.sam_hdr_add_lines(@sam_hdr, line, line.bytesize)
|
|
245
|
+
raise "Failed to add @PG line" if result < 0
|
|
246
|
+
|
|
247
|
+
self
|
|
129
248
|
end
|
|
130
249
|
|
|
131
250
|
private
|
|
132
251
|
|
|
252
|
+
def normalize_hd_tags(tags)
|
|
253
|
+
normalize_sam_tags(tags, HD_TAG_MAP)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def normalize_sq_tags(tags)
|
|
257
|
+
normalize_sam_tags(tags, SQ_TAG_MAP)
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def normalize_rg_tags(tags)
|
|
261
|
+
normalize_sam_tags(tags, RG_TAG_MAP)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def normalize_sam_tags(tags, tag_map)
|
|
265
|
+
tags.each_with_object([]) do |(key, value), pairs|
|
|
266
|
+
sam_key = tag_map.fetch(key.to_sym, key.to_s.upcase)
|
|
267
|
+
sam_value = value.is_a?(Array) ? value.join(",") : value.to_s
|
|
268
|
+
raise ArgumentError, "Header tag keys must not be empty" if sam_key.empty?
|
|
269
|
+
if sam_value.include?("\t") || sam_value.include?("\n") || sam_value.include?("\r")
|
|
270
|
+
raise ArgumentError, "Header tag values must not contain tabs or newlines"
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
pairs << [sam_key, sam_value]
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def parse_sam_pairs(line)
|
|
278
|
+
line.to_s.chomp.split("\t")[1..].to_a.map do |field|
|
|
279
|
+
key, value = field.split(":", 2)
|
|
280
|
+
[key, value.to_s]
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def find_line_pairs(type, id_key, id_value)
|
|
285
|
+
line = find_line(type, id_key, id_value)
|
|
286
|
+
line ? parse_sam_pairs(line) : []
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def merge_identified_sam_line(type, id_key, id_value, updates, protected_keys: [])
|
|
290
|
+
line = find_line(type, id_key, id_value)
|
|
291
|
+
raise ArgumentError, "Header line not found: @#{type} #{id_key}:#{id_value}" unless line
|
|
292
|
+
|
|
293
|
+
merge_sam_pairs(parse_sam_pairs(line), updates, protected_keys:)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def merge_sam_pairs(existing_pairs, updates, protected_keys: [])
|
|
297
|
+
pairs = existing_pairs.map(&:dup)
|
|
298
|
+
updates.each do |key, value|
|
|
299
|
+
if protected_keys.include?(key)
|
|
300
|
+
raise ArgumentError, "Header tag #{key} cannot be updated" unless existing_pairs.none? do |pair|
|
|
301
|
+
pair[0] == key && pair[1] == value
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
next
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
index = pairs.index { |pair| pair[0] == key }
|
|
308
|
+
if index
|
|
309
|
+
pairs[index] = [key, value]
|
|
310
|
+
else
|
|
311
|
+
pairs << [key, value]
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
pairs
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def add_structured_sam_line(type, pairs, preferred_order)
|
|
318
|
+
append(build_sam_line(type, pairs, preferred_order))
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def replace_sam_line(type, id_key, id_value, pairs, preferred_order)
|
|
322
|
+
delete_line(type, id_key, id_value)
|
|
323
|
+
append(build_sam_line(type, pairs, preferred_order))
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def build_sam_line(type, pairs, preferred_order)
|
|
327
|
+
ordered_pairs = preferred_order.filter_map do |key|
|
|
328
|
+
pairs.find { |pair| pair[0] == key }
|
|
329
|
+
end
|
|
330
|
+
pairs.each do |pair|
|
|
331
|
+
ordered_pairs << pair unless preferred_order.include?(pair[0])
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
"@#{type}\t#{ordered_pairs.map { |key, value| "#{key}:#{value}" }.join("\t")}\n"
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def ensure_newline(text)
|
|
338
|
+
text.end_with?("\n") ? text : "#{text}\n"
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def build_pg_line(program_name, options)
|
|
342
|
+
ordered_tags = normalize_pg_tags(program_name, options)
|
|
343
|
+
"@PG\t#{ordered_tags.map { |key, value| "#{key}:#{value}" }.join("\t")}\n"
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def normalize_pg_tags(program_name, options)
|
|
347
|
+
existing_ids = pg_ids
|
|
348
|
+
tag_map = options.each_with_object({}) do |(key, value), tags|
|
|
349
|
+
string_key = key.to_s
|
|
350
|
+
string_value = value.to_s
|
|
351
|
+
validate_pg_tag(string_key, string_value)
|
|
352
|
+
tags[string_key] = string_value
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
pg_id = resolve_pg_id(program_name, tag_map, existing_ids)
|
|
356
|
+
validate_pg_parent(tag_map["PP"], existing_ids)
|
|
357
|
+
|
|
358
|
+
ordered_tags = []
|
|
359
|
+
ordered_tags << ["ID", pg_id]
|
|
360
|
+
ordered_tags << ["PN", tag_map.fetch("PN", program_name)]
|
|
361
|
+
tag_map.each do |key, value|
|
|
362
|
+
next if %w[ID PN].include?(key)
|
|
363
|
+
|
|
364
|
+
ordered_tags << [key, value]
|
|
365
|
+
end
|
|
366
|
+
ordered_tags
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
def validate_pg_tag(key, value)
|
|
370
|
+
raise ArgumentError, "PG tag keys must not be empty" if key.empty?
|
|
371
|
+
return unless value.include?("\t") || value.include?("\n") || value.include?("\r")
|
|
372
|
+
|
|
373
|
+
raise ArgumentError, "PG tag values must not contain tabs or newlines"
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
def resolve_pg_id(program_name, tag_map, existing_ids)
|
|
377
|
+
explicit_id = tag_map["ID"]
|
|
378
|
+
if explicit_id
|
|
379
|
+
raise ArgumentError, "PG ID already exists: #{explicit_id}" if existing_ids.include?(explicit_id)
|
|
380
|
+
|
|
381
|
+
explicit_id
|
|
382
|
+
else
|
|
383
|
+
next_pg_id(program_name, existing_ids)
|
|
384
|
+
end
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
def validate_pg_parent(parent_id, existing_ids)
|
|
388
|
+
return unless parent_id
|
|
389
|
+
return if existing_ids.include?(parent_id)
|
|
390
|
+
|
|
391
|
+
raise ArgumentError, "Unknown PG parent: #{parent_id}"
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def next_pg_id(program_name, existing_ids)
|
|
395
|
+
candidate = program_name
|
|
396
|
+
suffix = 0
|
|
397
|
+
while existing_ids.include?(candidate)
|
|
398
|
+
suffix += 1
|
|
399
|
+
candidate = "#{program_name}.#{suffix}"
|
|
400
|
+
end
|
|
401
|
+
candidate
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def pg_ids
|
|
405
|
+
ids = []
|
|
406
|
+
to_s.each_line do |line|
|
|
407
|
+
next unless line.start_with?("@PG\t")
|
|
408
|
+
|
|
409
|
+
line.chomp.split("\t")[1..].each do |field|
|
|
410
|
+
key, value = field.split(":", 2)
|
|
411
|
+
next unless key == "ID" && value
|
|
412
|
+
|
|
413
|
+
ids << value
|
|
414
|
+
break
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
ids
|
|
418
|
+
end
|
|
419
|
+
|
|
133
420
|
def name2tid(name)
|
|
134
421
|
LibHTS.sam_hdr_name2tid(@sam_hdr, name)
|
|
135
422
|
end
|
data/lib/hts/bam/mpileup.rb
CHANGED
|
@@ -44,6 +44,7 @@ module HTS
|
|
|
44
44
|
n = @bams.length
|
|
45
45
|
@iters = []
|
|
46
46
|
@data_blocks = [] # per-input packed pointers kept alive
|
|
47
|
+
@data_entries = {}
|
|
47
48
|
|
|
48
49
|
# Prepare optional region iterators for each input
|
|
49
50
|
@bams.each_with_index do |bam, i|
|
|
@@ -66,7 +67,8 @@ module HTS
|
|
|
66
67
|
end
|
|
67
68
|
|
|
68
69
|
# Build per-input packed pointer blocks so C passes them back to the callback.
|
|
69
|
-
#
|
|
70
|
+
# Keep the Ruby FFI structs in @data_entries to avoid rebuilding wrappers
|
|
71
|
+
# in the per-record callback.
|
|
70
72
|
ptr_size = FFI.type_size(:pointer)
|
|
71
73
|
data_array = FFI::MemoryPointer.new(:pointer, n)
|
|
72
74
|
@bams.each_with_index do |bam, i|
|
|
@@ -78,18 +80,16 @@ module HTS
|
|
|
78
80
|
block.put_pointer(1 * ptr_size, hdr_struct)
|
|
79
81
|
block.put_pointer(2 * ptr_size, itr && !itr.null? ? itr : FFI::Pointer::NULL)
|
|
80
82
|
@data_blocks << block
|
|
83
|
+
@data_entries[block.address] = [hts_fp, hdr_struct, itr && !itr.null? ? itr : nil]
|
|
81
84
|
data_array.put_pointer(i * ptr_size, block)
|
|
82
85
|
end
|
|
83
86
|
# Keep the array of per-input blocks alive while the C side holds on to them
|
|
84
87
|
@data_array = data_array
|
|
85
88
|
|
|
86
89
|
@cb = FFI::Function.new(:int, %i[pointer pointer]) do |data, b|
|
|
87
|
-
|
|
88
|
-
hts_fp = data.get_pointer(0 * ptr_size)
|
|
89
|
-
hdr_struct = data.get_pointer(1 * ptr_size)
|
|
90
|
-
itr = data.get_pointer(2 * ptr_size)
|
|
90
|
+
hts_fp, hdr_struct, itr = @data_entries.fetch(data.address)
|
|
91
91
|
# HTSlib contract: return same as sam_itr_next/sam_read1 (>= 0 on success, -1 on EOF, < -1 on error)
|
|
92
|
-
if itr
|
|
92
|
+
if itr
|
|
93
93
|
HTS::LibHTS.sam_itr_next(hts_fp, itr, b)
|
|
94
94
|
else
|
|
95
95
|
HTS::LibHTS.sam_read1(hts_fp, hdr_struct, b)
|
|
@@ -162,7 +162,7 @@ module HTS
|
|
|
162
162
|
end
|
|
163
163
|
@iters.clear
|
|
164
164
|
# Keep references to callback and data blocks to prevent GC
|
|
165
|
-
@_keepalive = [@cb, @data_array, *@data_blocks]
|
|
165
|
+
@_keepalive = [@cb, @data_array, @data_entries, *@data_blocks]
|
|
166
166
|
# Close owned bams opened by this object
|
|
167
167
|
@owned_bams.each do |b|
|
|
168
168
|
b.close
|
data/lib/hts/bam/record.rb
CHANGED
|
@@ -37,12 +37,14 @@ module HTS
|
|
|
37
37
|
return if params.all? { |x| x.nil? }
|
|
38
38
|
|
|
39
39
|
if params.all?
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
cigar_array = Cigar.parse(cigar).array
|
|
41
|
+
cigar_pointer = if cigar_array.empty?
|
|
42
|
+
FFI::Pointer::NULL
|
|
43
|
+
else
|
|
44
|
+
FFI::MemoryPointer.new(:uint32, cigar_array.length).tap do |pointer|
|
|
45
|
+
pointer.write_array_of_uint32(cigar_array)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
46
48
|
if qual.is_a?(Array)
|
|
47
49
|
qual = qual.pack("C*")
|
|
48
50
|
elsif qual.is_a?(String)
|
|
@@ -214,11 +216,10 @@ module HTS
|
|
|
214
216
|
end
|
|
215
217
|
|
|
216
218
|
def cigar=(str)
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
r
|
|
221
|
-
raise "bam_parse_cigar failed: #{r}" if r != 0
|
|
219
|
+
case str
|
|
220
|
+
when Cigar, String
|
|
221
|
+
r = LibHTS.bam_parse_cigar(str.to_s, FFI::Pointer::NULL, @bam1)
|
|
222
|
+
raise "bam_parse_cigar failed: #{r}" if r.negative?
|
|
222
223
|
else
|
|
223
224
|
raise ArgumentError, "cigar must be a String or Bam::Cigar"
|
|
224
225
|
end
|
|
@@ -272,8 +273,11 @@ module HTS
|
|
|
272
273
|
SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
|
|
273
274
|
end
|
|
274
275
|
|
|
275
|
-
# Get the base qualities.
|
|
276
|
-
#
|
|
276
|
+
# Get the base qualities as raw PHRED bytes.
|
|
277
|
+
# Ruby has no UInt8 type, so this returns an Array<Integer> with values in 0..255,
|
|
278
|
+
# corresponding to Crystal's Array(UInt8).
|
|
279
|
+
# Use qual_string for the SAM-style ASCII representation.
|
|
280
|
+
# @return [Array<Integer>] base qualities as unsigned bytes
|
|
277
281
|
def qual
|
|
278
282
|
q_ptr = LibHTS.bam_get_qual(@bam1)
|
|
279
283
|
q_ptr.read_array_of_uint8(len)
|
|
@@ -351,9 +355,13 @@ module HTS
|
|
|
351
355
|
# @return [String] a string representation of the alignment.
|
|
352
356
|
def to_s
|
|
353
357
|
kstr = LibHTS::KString.new
|
|
354
|
-
|
|
358
|
+
begin
|
|
359
|
+
raise "Failed to format bam record" if LibHTS.sam_format1(@header.struct, @bam1, kstr) == -1
|
|
355
360
|
|
|
356
|
-
|
|
361
|
+
kstr.read_string_copy
|
|
362
|
+
ensure
|
|
363
|
+
kstr.free_buffer
|
|
364
|
+
end
|
|
357
365
|
end
|
|
358
366
|
|
|
359
367
|
private
|
data/lib/hts/bam.rb
CHANGED
|
@@ -31,6 +31,25 @@ module HTS
|
|
|
31
31
|
file
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
+
def self.build_index(file_name, index_name = nil, min_shift = 0, threads = 0, verbose = true)
|
|
35
|
+
if verbose
|
|
36
|
+
if index_name
|
|
37
|
+
warn "Create index for #{file_name} to #{index_name}"
|
|
38
|
+
else
|
|
39
|
+
warn "Create index for #{file_name}"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
case LibHTS.sam_index_build3(file_name, index_name, min_shift, threads)
|
|
44
|
+
when 0 # successful
|
|
45
|
+
when -1 then raise "indexing failed"
|
|
46
|
+
when -2 then raise "opening #{file_name} failed"
|
|
47
|
+
when -3 then raise "format not indexable"
|
|
48
|
+
when -4 then raise "failed to create and/or save the index"
|
|
49
|
+
else raise "unknown error"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
34
53
|
def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
|
|
35
54
|
build_index: false)
|
|
36
55
|
if block_given?
|
|
@@ -74,22 +93,10 @@ module HTS
|
|
|
74
93
|
@start_position = tell
|
|
75
94
|
end
|
|
76
95
|
|
|
77
|
-
def build_index(index_name = nil, min_shift: 0,
|
|
96
|
+
def build_index(index_name = nil, min_shift: 0, verbose: true)
|
|
78
97
|
check_closed
|
|
79
98
|
|
|
80
|
-
|
|
81
|
-
warn "Create index for #{@file_name} to #{index_name}"
|
|
82
|
-
else
|
|
83
|
-
warn "Create index for #{@file_name}"
|
|
84
|
-
end
|
|
85
|
-
case LibHTS.sam_index_build3(@file_name, index_name, min_shift, @nthreads || threads)
|
|
86
|
-
when 0 # successful
|
|
87
|
-
when -1 then raise "indexing failed"
|
|
88
|
-
when -2 then raise "opening #{@file_name} failed"
|
|
89
|
-
when -3 then raise "format not indexable"
|
|
90
|
-
when -4 then raise "failed to create and/or save the index"
|
|
91
|
-
else raise "unknown error"
|
|
92
|
-
end
|
|
99
|
+
self.class.build_index(@file_name, index_name, min_shift, @nthreads || 0, verbose)
|
|
93
100
|
self # for method chaining
|
|
94
101
|
end
|
|
95
102
|
|
|
@@ -317,15 +324,14 @@ module HTS
|
|
|
317
324
|
check_closed
|
|
318
325
|
return to_enum(__method__) unless block_given?
|
|
319
326
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
end
|
|
327
|
+
bam1 = LibHTS.bam_init1
|
|
328
|
+
record = Record.new(header, bam1)
|
|
329
|
+
yield record.dup while LibHTS.sam_read1(@hts_file, header, bam1) != -1
|
|
324
330
|
self
|
|
325
331
|
end
|
|
326
332
|
|
|
327
333
|
def queryi_reuse(tid, beg, end_, &block)
|
|
328
|
-
return to_enum(__method__,
|
|
334
|
+
return to_enum(__method__, tid, beg, end_) unless block_given?
|
|
329
335
|
|
|
330
336
|
qiter = LibHTS.sam_itr_queryi(@idx, tid, beg, end_)
|
|
331
337
|
raise "Failed to query region: #{tid} #{beg} #{end_}" if qiter.null?
|
|
@@ -369,20 +375,24 @@ module HTS
|
|
|
369
375
|
bam1 = LibHTS.bam_init1
|
|
370
376
|
record = Record.new(header, bam1)
|
|
371
377
|
begin
|
|
372
|
-
|
|
378
|
+
while (slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)) >= 0
|
|
379
|
+
yield record
|
|
380
|
+
end
|
|
381
|
+
raise if slen < -1
|
|
373
382
|
ensure
|
|
374
383
|
LibHTS.hts_itr_destroy(qiter)
|
|
375
384
|
end
|
|
376
385
|
end
|
|
377
386
|
|
|
378
387
|
def query_copy(qiter)
|
|
388
|
+
bam1 = LibHTS.bam_init1
|
|
389
|
+
record = Record.new(header, bam1)
|
|
379
390
|
loop do
|
|
380
|
-
bam1 = LibHTS.bam_init1
|
|
381
391
|
slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)
|
|
382
392
|
break if slen == -1
|
|
383
393
|
raise if slen < -1
|
|
384
394
|
|
|
385
|
-
yield
|
|
395
|
+
yield record.dup
|
|
386
396
|
end
|
|
387
397
|
ensure
|
|
388
398
|
LibHTS.hts_itr_destroy(qiter)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class Bcf < Hts
|
|
5
|
+
class Error < HTS::Error; end
|
|
6
|
+
|
|
7
|
+
class OpenError < Error; end
|
|
8
|
+
class IndexError < Error; end
|
|
9
|
+
class MissingIndexError < IndexError; end
|
|
10
|
+
class QueryError < Error; end
|
|
11
|
+
class HeaderError < Error; end
|
|
12
|
+
class SubsetError < HeaderError; end
|
|
13
|
+
class UnknownSampleError < SubsetError; end
|
|
14
|
+
class FieldError < Error; end
|
|
15
|
+
class InfoError < FieldError; end
|
|
16
|
+
class InfoTypeError < InfoError; end
|
|
17
|
+
class InfoReadError < InfoError; end
|
|
18
|
+
class InfoUpdateError < InfoError; end
|
|
19
|
+
class UnsupportedInfoOperationError < InfoError; end
|
|
20
|
+
class FormatError < FieldError; end
|
|
21
|
+
class FormatDefinitionError < FormatError; end
|
|
22
|
+
class FormatTypeError < FormatError; end
|
|
23
|
+
class FormatReadError < FormatError; end
|
|
24
|
+
class FormatUpdateError < FormatError; end
|
|
25
|
+
class UnsupportedFormatOperationError < FormatError; end
|
|
26
|
+
end
|
|
27
|
+
end
|