htslib 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +23 -1
- data/lib/hts/bam/auxi.rb +228 -19
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +119 -36
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +85 -102
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +34 -2
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +10 -5
- data/lib/hts/version.rb +1 -1
- metadata +4 -4
- data/lib/hts/faidx/sequence.rb +0 -62
data/lib/hts/libhts/constants.rb
CHANGED
|
@@ -12,6 +12,27 @@ module HTS
|
|
|
12
12
|
:l, :size_t,
|
|
13
13
|
:m, :size_t,
|
|
14
14
|
:s, :string
|
|
15
|
+
|
|
16
|
+
def buffer_ptr
|
|
17
|
+
to_ptr.get_pointer(self.class.offset_of(:s))
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def read_string_copy
|
|
21
|
+
ptr = buffer_ptr
|
|
22
|
+
return "" if ptr.null?
|
|
23
|
+
|
|
24
|
+
ptr.read_string(self[:l])
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def free_buffer
|
|
28
|
+
ptr = buffer_ptr
|
|
29
|
+
return if ptr.null?
|
|
30
|
+
|
|
31
|
+
LibHTS.hts_free(ptr)
|
|
32
|
+
to_ptr.put_pointer(self.class.offset_of(:s), FFI::Pointer::NULL)
|
|
33
|
+
self[:l] = 0
|
|
34
|
+
self[:m] = 0
|
|
35
|
+
end
|
|
15
36
|
end
|
|
16
37
|
|
|
17
38
|
class KSeq < FFI::Struct
|
|
@@ -430,6 +451,17 @@ module HTS
|
|
|
430
451
|
:idx, HtsIdx.ptr,
|
|
431
452
|
:dict, :pointer
|
|
432
453
|
|
|
454
|
+
def close
|
|
455
|
+
return if @closed
|
|
456
|
+
|
|
457
|
+
ptr = to_ptr
|
|
458
|
+
unless ptr.null?
|
|
459
|
+
ptr.autorelease = false if ptr.respond_to?(:autorelease=)
|
|
460
|
+
self.class.release(ptr)
|
|
461
|
+
end
|
|
462
|
+
@closed = true
|
|
463
|
+
end
|
|
464
|
+
|
|
433
465
|
def self.release(ptr)
|
|
434
466
|
LibHTS.tbx_destroy(ptr) unless ptr.null?
|
|
435
467
|
end
|
|
@@ -466,7 +498,7 @@ module HTS
|
|
|
466
498
|
:n, :int
|
|
467
499
|
end
|
|
468
500
|
|
|
469
|
-
# Complete textual representation of a header line
|
|
501
|
+
# Complete textual representation of a header line owned by Ruby.
|
|
470
502
|
class BcfHrec < FFI::ManagedStruct
|
|
471
503
|
layout \
|
|
472
504
|
:type, :int,
|
|
@@ -503,7 +535,7 @@ module HTS
|
|
|
503
535
|
class BcfIdinfo < FFI::Struct
|
|
504
536
|
layout \
|
|
505
537
|
:info, [:uint64, 3],
|
|
506
|
-
:hrec, [
|
|
538
|
+
:hrec, [:pointer, 3],
|
|
507
539
|
:id, :int
|
|
508
540
|
end
|
|
509
541
|
|
data/lib/hts/libhts/cram.rb
CHANGED
|
@@ -330,11 +330,6 @@ module HTS
|
|
|
330
330
|
%i[cram_fd cram_block cram_metrics int int],
|
|
331
331
|
:int
|
|
332
332
|
|
|
333
|
-
# attach_function \
|
|
334
|
-
# :cram_compress_block2,
|
|
335
|
-
# %i[cram_fd cram_slice cram_block cram_metrics int int],
|
|
336
|
-
# :int
|
|
337
|
-
|
|
338
333
|
# Creates a new container, specifying the maximum number of slices
|
|
339
334
|
# and records permitted.
|
|
340
335
|
attach_function \
|
data/lib/hts/libhts/fai.rb
CHANGED
|
@@ -48,24 +48,24 @@ module HTS
|
|
|
48
48
|
attach_function \
|
|
49
49
|
:fai_fetch,
|
|
50
50
|
[Faidx, :string, :pointer],
|
|
51
|
-
:
|
|
51
|
+
:pointer
|
|
52
52
|
|
|
53
53
|
# Fetch the sequence in a region
|
|
54
54
|
attach_function \
|
|
55
55
|
:fai_fetch64,
|
|
56
56
|
[Faidx, :string, :pointer],
|
|
57
|
-
:
|
|
57
|
+
:pointer
|
|
58
58
|
|
|
59
59
|
# Fetch the quality string for a region for FASTQ files
|
|
60
60
|
attach_function \
|
|
61
61
|
:fai_fetchqual,
|
|
62
62
|
[Faidx, :string, :pointer],
|
|
63
|
-
:
|
|
63
|
+
:pointer
|
|
64
64
|
|
|
65
65
|
attach_function \
|
|
66
66
|
:fai_fetchqual64,
|
|
67
67
|
[Faidx, :string, :pointer],
|
|
68
|
-
:
|
|
68
|
+
:pointer
|
|
69
69
|
|
|
70
70
|
# Fetch the number of sequences
|
|
71
71
|
attach_function \
|
|
@@ -77,25 +77,25 @@ module HTS
|
|
|
77
77
|
attach_function \
|
|
78
78
|
:faidx_fetch_seq,
|
|
79
79
|
[Faidx, :string, :int, :int, :pointer],
|
|
80
|
-
:
|
|
80
|
+
:pointer
|
|
81
81
|
|
|
82
82
|
# Fetch the sequence in a region
|
|
83
83
|
attach_function \
|
|
84
84
|
:faidx_fetch_seq64,
|
|
85
85
|
[Faidx, :string, :int64, :int64, :pointer],
|
|
86
|
-
:
|
|
86
|
+
:pointer
|
|
87
87
|
|
|
88
88
|
# Fetch the quality string in a region for FASTQ files
|
|
89
89
|
attach_function \
|
|
90
90
|
:faidx_fetch_qual,
|
|
91
91
|
[Faidx, :string, :int, :int, :pointer],
|
|
92
|
-
:
|
|
92
|
+
:pointer
|
|
93
93
|
|
|
94
94
|
# Fetch the quality string in a region for FASTQ files
|
|
95
95
|
attach_function \
|
|
96
96
|
:faidx_fetch_qual64,
|
|
97
97
|
[Faidx, :string, :int64, :int64, :pointer],
|
|
98
|
-
:
|
|
98
|
+
:pointer
|
|
99
99
|
|
|
100
100
|
# Query if sequence is present
|
|
101
101
|
attach_function \
|
|
@@ -121,6 +121,11 @@ module HTS
|
|
|
121
121
|
[Faidx, :string],
|
|
122
122
|
:int
|
|
123
123
|
|
|
124
|
+
attach_function \
|
|
125
|
+
:faidx_seq_len64,
|
|
126
|
+
[Faidx, :string],
|
|
127
|
+
:int64
|
|
128
|
+
|
|
124
129
|
# Parses a region string.
|
|
125
130
|
attach_function \
|
|
126
131
|
:fai_parse_region,
|
data/lib/hts/libhts/hfile.rb
CHANGED
|
@@ -46,10 +46,10 @@ module HTS
|
|
|
46
46
|
|
|
47
47
|
# Report the current stream offset
|
|
48
48
|
def self.htell(fp)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
fp[:offset] + (
|
|
49
|
+
base_ptr = fp.pointer
|
|
50
|
+
begin_ptr = base_ptr.get_pointer(fp.offset_of(:begin))
|
|
51
|
+
buffer_ptr = base_ptr.get_pointer(fp.offset_of(:buffer))
|
|
52
|
+
fp[:offset] + (begin_ptr.address - buffer_ptr.address)
|
|
53
53
|
end
|
|
54
54
|
|
|
55
55
|
# Read from the stream until the delimiter, up to a maximum length
|
data/lib/hts/libhts/hts.rb
CHANGED
data/lib/hts/libhts/sam.rb
CHANGED
|
@@ -382,11 +382,15 @@ module HTS
|
|
|
382
382
|
|
|
383
383
|
# Get the next read from a SAM/BAM/CRAM iterator
|
|
384
384
|
def self.sam_itr_next(htsfp, itr, r)
|
|
385
|
-
#
|
|
385
|
+
raise("#{htsfp[:fn] || 'File'} not BGZF compressed") unless htsfp[:is_bgzf] == 1 || htsfp[:is_cram] == 1
|
|
386
386
|
raise("Null iterator") if itr.null?
|
|
387
387
|
|
|
388
|
-
|
|
389
|
-
|
|
388
|
+
if itr[:multi] == 1
|
|
389
|
+
hts_itr_multi_next(htsfp, itr, r)
|
|
390
|
+
else
|
|
391
|
+
bgzf = htsfp[:is_bgzf] == 1 ? htsfp[:fp][:bgzf] : FFI::Pointer::NULL
|
|
392
|
+
hts_itr_next(bgzf, itr, r, htsfp)
|
|
393
|
+
end
|
|
390
394
|
end
|
|
391
395
|
|
|
392
396
|
attach_function \
|
|
@@ -508,7 +512,7 @@ module HTS
|
|
|
508
512
|
# Append tag data to a bam record
|
|
509
513
|
attach_function \
|
|
510
514
|
:bam_aux_append,
|
|
511
|
-
[Bam1, :string, :
|
|
515
|
+
[Bam1, :string, :char, :int, :pointer],
|
|
512
516
|
:int
|
|
513
517
|
|
|
514
518
|
# Delete tag data from a bam record
|
|
@@ -738,6 +742,18 @@ module HTS
|
|
|
738
742
|
:bam_mods_recorded,
|
|
739
743
|
[HtsBaseModState, :pointer],
|
|
740
744
|
:pointer
|
|
745
|
+
|
|
746
|
+
# Sets the header to the file
|
|
747
|
+
attach_function \
|
|
748
|
+
:sam_hdr_set,
|
|
749
|
+
[HtsFile, SamHdr, :int],
|
|
750
|
+
:int
|
|
751
|
+
|
|
752
|
+
# Get the header from the file pointer
|
|
753
|
+
attach_function \
|
|
754
|
+
:sam_hdr_get,
|
|
755
|
+
[HtsFile],
|
|
756
|
+
SamHdr
|
|
741
757
|
end
|
|
742
758
|
end
|
|
743
759
|
|
data/lib/hts/libhts/vcf.rb
CHANGED
|
@@ -270,12 +270,12 @@ module HTS
|
|
|
270
270
|
attach_function \
|
|
271
271
|
:bcf_hdr_get_hrec,
|
|
272
272
|
[BcfHdr, :int, :string, :string, :string],
|
|
273
|
-
|
|
273
|
+
:pointer
|
|
274
274
|
|
|
275
275
|
# Duplicate a header record
|
|
276
276
|
attach_function \
|
|
277
277
|
:bcf_hrec_dup,
|
|
278
|
-
[
|
|
278
|
+
[:pointer],
|
|
279
279
|
BcfHrec.by_ref
|
|
280
280
|
|
|
281
281
|
# Add a new header record key
|
|
@@ -304,7 +304,7 @@ module HTS
|
|
|
304
304
|
# Free up a header record and associated structures
|
|
305
305
|
attach_function \
|
|
306
306
|
:bcf_hrec_destroy,
|
|
307
|
-
[
|
|
307
|
+
[:pointer],
|
|
308
308
|
:void
|
|
309
309
|
|
|
310
310
|
# Individual record querying and manipulation routines
|
|
@@ -539,15 +539,18 @@ module HTS
|
|
|
539
539
|
[HtsFile],
|
|
540
540
|
:int
|
|
541
541
|
|
|
542
|
-
|
|
542
|
+
attach_variable \
|
|
543
543
|
:bcf_float_vector_end,
|
|
544
|
-
[],
|
|
545
544
|
:uint32
|
|
546
545
|
|
|
547
|
-
|
|
546
|
+
attach_variable \
|
|
548
547
|
:bcf_float_missing,
|
|
549
|
-
[],
|
|
550
548
|
:uint32
|
|
549
|
+
|
|
550
|
+
attach_function \
|
|
551
|
+
:bcf_format_gt_v2,
|
|
552
|
+
[BcfHdr, BcfFmt, :int, KString],
|
|
553
|
+
:int
|
|
551
554
|
end
|
|
552
555
|
end
|
|
553
556
|
|
data/lib/hts/libhts/vcf_funcs.rb
CHANGED
|
@@ -97,6 +97,11 @@ module HTS
|
|
|
97
97
|
bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
|
|
98
98
|
end
|
|
99
99
|
|
|
100
|
+
# Function for updating INFO fields (int64; VCF only)
|
|
101
|
+
def bcf_update_info_int64(hdr, line, key, values, n)
|
|
102
|
+
bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG)
|
|
103
|
+
end
|
|
104
|
+
|
|
100
105
|
# Function for updating INFO fields
|
|
101
106
|
def bcf_update_info_float(hdr, line, key, values, n)
|
|
102
107
|
bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
|
|
@@ -150,9 +155,19 @@ module HTS
|
|
|
150
155
|
0
|
|
151
156
|
end
|
|
152
157
|
|
|
158
|
+
# Macro for setting genotypes correctly
|
|
159
|
+
def bcf_gt_vector_end
|
|
160
|
+
bcf_int32_vector_end
|
|
161
|
+
end
|
|
162
|
+
|
|
153
163
|
# Macro for setting genotypes correctly
|
|
154
164
|
def bcf_gt_is_missing(val)
|
|
155
|
-
(val >> 1
|
|
165
|
+
((val >> 1) == 0 ? 1 : 0)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Macro for setting genotypes correctly
|
|
169
|
+
def bcf_gt_is_vector_end(val)
|
|
170
|
+
val == bcf_gt_vector_end ? 1 : 0
|
|
156
171
|
end
|
|
157
172
|
|
|
158
173
|
# Macro for setting genotypes correctly
|
|
@@ -175,6 +190,11 @@ module HTS
|
|
|
175
190
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
|
|
176
191
|
end
|
|
177
192
|
|
|
193
|
+
# Get INFO values (int64; VCF only)
|
|
194
|
+
def bcf_get_info_int64(hdr, line, tag, dst, ndst)
|
|
195
|
+
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_LONG)
|
|
196
|
+
end
|
|
197
|
+
|
|
178
198
|
# Get INFO values
|
|
179
199
|
def bcf_get_info_float(hdr, line, tag, dst, ndst)
|
|
180
200
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
|
|
@@ -265,7 +285,16 @@ module HTS
|
|
|
265
285
|
)[:val][:info][type] & 0xf
|
|
266
286
|
end
|
|
267
287
|
|
|
268
|
-
|
|
288
|
+
def bcf_hdr_idinfo_exists(hdr, type, int_id)
|
|
289
|
+
return false if int_id.negative? || int_id >= hdr[:n][LibHTS::BCF_DT_ID]
|
|
290
|
+
|
|
291
|
+
pair = LibHTS::BcfIdpair.new(
|
|
292
|
+
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
|
293
|
+
LibHTS::BcfIdpair.size * int_id # offset
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
!pair[:val].null? && bcf_hdr_id2coltype(hdr, type, int_id) != 0xf
|
|
297
|
+
end
|
|
269
298
|
|
|
270
299
|
# def bcf_hdr_id2hrec
|
|
271
300
|
|
data/lib/hts/tabix.rb
CHANGED
|
@@ -89,8 +89,11 @@ module HTS
|
|
|
89
89
|
def seqnames
|
|
90
90
|
check_closed
|
|
91
91
|
nseq = FFI::MemoryPointer.new(:int)
|
|
92
|
-
LibHTS.tbx_seqnames(@idx, nseq)
|
|
92
|
+
pts = LibHTS.tbx_seqnames(@idx, nseq)
|
|
93
|
+
begin
|
|
93
94
|
pts.read_array_of_pointer(nseq.read_int).map(&:read_string)
|
|
95
|
+
ensure
|
|
96
|
+
LibHTS.hts_free(pts) unless pts.null?
|
|
94
97
|
end
|
|
95
98
|
end
|
|
96
99
|
|
|
@@ -108,9 +111,7 @@ module HTS
|
|
|
108
111
|
def close
|
|
109
112
|
return if closed?
|
|
110
113
|
|
|
111
|
-
|
|
112
|
-
# Do not call tbx_destroy here; the FFI finalizer will
|
|
113
|
-
# release the underlying C struct when @idx becomes unreachable.
|
|
114
|
+
@idx.close if @idx && !@idx.null?
|
|
114
115
|
@idx = nil
|
|
115
116
|
super
|
|
116
117
|
end
|
|
@@ -144,8 +145,12 @@ module HTS
|
|
|
144
145
|
def query_yield(qiter)
|
|
145
146
|
r = LibHTS::KString.new
|
|
146
147
|
begin
|
|
147
|
-
|
|
148
|
+
while (slen = LibHTS.tbx_itr_next(@hts_file, @idx, qiter, r)) >= 0
|
|
149
|
+
yield r.read_string_copy.split("\t")
|
|
150
|
+
end
|
|
151
|
+
raise if slen < -1
|
|
148
152
|
ensure
|
|
153
|
+
r.free_buffer
|
|
149
154
|
LibHTS.hts_itr_destroy(qiter)
|
|
150
155
|
end
|
|
151
156
|
end
|
data/lib/hts/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: htslib
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: ffi
|
|
@@ -71,13 +71,13 @@ files:
|
|
|
71
71
|
- lib/hts/bam/pileup.rb
|
|
72
72
|
- lib/hts/bam/record.rb
|
|
73
73
|
- lib/hts/bcf.rb
|
|
74
|
+
- lib/hts/bcf/errors.rb
|
|
74
75
|
- lib/hts/bcf/format.rb
|
|
75
76
|
- lib/hts/bcf/header.rb
|
|
76
77
|
- lib/hts/bcf/header_record.rb
|
|
77
78
|
- lib/hts/bcf/info.rb
|
|
78
79
|
- lib/hts/bcf/record.rb
|
|
79
80
|
- lib/hts/faidx.rb
|
|
80
|
-
- lib/hts/faidx/sequence.rb
|
|
81
81
|
- lib/hts/ffi_ext/README.md
|
|
82
82
|
- lib/hts/ffi_ext/pointer.rb
|
|
83
83
|
- lib/hts/ffi_ext/struct.rb
|
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
119
119
|
- !ruby/object:Gem::Version
|
|
120
120
|
version: '0'
|
|
121
121
|
requirements: []
|
|
122
|
-
rubygems_version:
|
|
122
|
+
rubygems_version: 4.0.10
|
|
123
123
|
specification_version: 4
|
|
124
124
|
summary: HTSlib bindings for Ruby
|
|
125
125
|
test_files: []
|
data/lib/hts/faidx/sequence.rb
DELETED
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
module HTS
|
|
2
|
-
class Faidx
|
|
3
|
-
class Sequence
|
|
4
|
-
attr_reader :name, :faidx
|
|
5
|
-
|
|
6
|
-
def initialize(faidx, name)
|
|
7
|
-
raise ArgumentError, "Sequence not found: #{name}" unless faidx.has_key?(name)
|
|
8
|
-
|
|
9
|
-
@faidx = faidx
|
|
10
|
-
@name = name
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
def length
|
|
14
|
-
faidx.seq_len(name)
|
|
15
|
-
end
|
|
16
|
-
alias size length
|
|
17
|
-
|
|
18
|
-
def seq(start = nil, stop = nil)
|
|
19
|
-
faidx.seq(name, start, stop)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def qual(start = nil, stop = nil)
|
|
23
|
-
faidx.qual(name, start, stop)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def [](arg)
|
|
27
|
-
case arg
|
|
28
|
-
when Integer
|
|
29
|
-
if arg >= 0
|
|
30
|
-
start = arg
|
|
31
|
-
stop = arg
|
|
32
|
-
else
|
|
33
|
-
start = length + arg
|
|
34
|
-
stop = length + arg
|
|
35
|
-
end
|
|
36
|
-
when Range
|
|
37
|
-
arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
|
|
38
|
-
arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
|
|
39
|
-
if arg.begin.nil?
|
|
40
|
-
if arg.end.nil?
|
|
41
|
-
start = nil
|
|
42
|
-
stop = nil
|
|
43
|
-
else
|
|
44
|
-
start = 0
|
|
45
|
-
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
|
46
|
-
end
|
|
47
|
-
elsif arg.end.nil?
|
|
48
|
-
# always include the first base
|
|
49
|
-
start = arg.begin
|
|
50
|
-
stop = length - 1
|
|
51
|
-
else
|
|
52
|
-
start = arg.begin
|
|
53
|
-
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
|
54
|
-
end
|
|
55
|
-
else
|
|
56
|
-
raise ArgumentError
|
|
57
|
-
end
|
|
58
|
-
seq(start, stop)
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
end
|