htslib 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/TUTORIAL.md +67 -0
- data/lib/hts/bam/auxi.rb +329 -2
- data/lib/hts/bam/cigar.rb +10 -2
- data/lib/hts/bam/header.rb +293 -6
- data/lib/hts/bam/mpileup.rb +7 -7
- data/lib/hts/bam/record.rb +23 -15
- data/lib/hts/bam.rb +32 -22
- data/lib/hts/bcf/errors.rb +27 -0
- data/lib/hts/bcf/format.rb +386 -32
- data/lib/hts/bcf/header.rb +320 -13
- data/lib/hts/bcf/header_record.rb +6 -2
- data/lib/hts/bcf/info.rb +269 -28
- data/lib/hts/bcf/record.rb +9 -5
- data/lib/hts/bcf.rb +163 -34
- data/lib/hts/faidx.rb +110 -73
- data/lib/hts/hts.rb +4 -1
- data/lib/hts/libhts/constants.rb +41 -3
- data/lib/hts/libhts/cram.rb +0 -5
- data/lib/hts/libhts/fai.rb +13 -8
- data/lib/hts/libhts/hfile.rb +4 -4
- data/lib/hts/libhts/hts.rb +6 -0
- data/lib/hts/libhts/sam.rb +20 -4
- data/lib/hts/libhts/vcf.rb +10 -7
- data/lib/hts/libhts/vcf_funcs.rb +31 -2
- data/lib/hts/tabix.rb +29 -2
- data/lib/hts/version.rb +1 -1
- metadata +3 -3
- data/lib/hts/faidx/sequence.rb +0 -62
data/lib/hts/libhts/fai.rb
CHANGED
|
@@ -48,24 +48,24 @@ module HTS
|
|
|
48
48
|
attach_function \
|
|
49
49
|
:fai_fetch,
|
|
50
50
|
[Faidx, :string, :pointer],
|
|
51
|
-
:
|
|
51
|
+
:pointer
|
|
52
52
|
|
|
53
53
|
# Fetch the sequence in a region
|
|
54
54
|
attach_function \
|
|
55
55
|
:fai_fetch64,
|
|
56
56
|
[Faidx, :string, :pointer],
|
|
57
|
-
:
|
|
57
|
+
:pointer
|
|
58
58
|
|
|
59
59
|
# Fetch the quality string for a region for FASTQ files
|
|
60
60
|
attach_function \
|
|
61
61
|
:fai_fetchqual,
|
|
62
62
|
[Faidx, :string, :pointer],
|
|
63
|
-
:
|
|
63
|
+
:pointer
|
|
64
64
|
|
|
65
65
|
attach_function \
|
|
66
66
|
:fai_fetchqual64,
|
|
67
67
|
[Faidx, :string, :pointer],
|
|
68
|
-
:
|
|
68
|
+
:pointer
|
|
69
69
|
|
|
70
70
|
# Fetch the number of sequences
|
|
71
71
|
attach_function \
|
|
@@ -77,25 +77,25 @@ module HTS
|
|
|
77
77
|
attach_function \
|
|
78
78
|
:faidx_fetch_seq,
|
|
79
79
|
[Faidx, :string, :int, :int, :pointer],
|
|
80
|
-
:
|
|
80
|
+
:pointer
|
|
81
81
|
|
|
82
82
|
# Fetch the sequence in a region
|
|
83
83
|
attach_function \
|
|
84
84
|
:faidx_fetch_seq64,
|
|
85
85
|
[Faidx, :string, :int64, :int64, :pointer],
|
|
86
|
-
:
|
|
86
|
+
:pointer
|
|
87
87
|
|
|
88
88
|
# Fetch the quality string in a region for FASTQ files
|
|
89
89
|
attach_function \
|
|
90
90
|
:faidx_fetch_qual,
|
|
91
91
|
[Faidx, :string, :int, :int, :pointer],
|
|
92
|
-
:
|
|
92
|
+
:pointer
|
|
93
93
|
|
|
94
94
|
# Fetch the quality string in a region for FASTQ files
|
|
95
95
|
attach_function \
|
|
96
96
|
:faidx_fetch_qual64,
|
|
97
97
|
[Faidx, :string, :int64, :int64, :pointer],
|
|
98
|
-
:
|
|
98
|
+
:pointer
|
|
99
99
|
|
|
100
100
|
# Query if sequence is present
|
|
101
101
|
attach_function \
|
|
@@ -121,6 +121,11 @@ module HTS
|
|
|
121
121
|
[Faidx, :string],
|
|
122
122
|
:int
|
|
123
123
|
|
|
124
|
+
attach_function \
|
|
125
|
+
:faidx_seq_len64,
|
|
126
|
+
[Faidx, :string],
|
|
127
|
+
:int64
|
|
128
|
+
|
|
124
129
|
# Parses a region string.
|
|
125
130
|
attach_function \
|
|
126
131
|
:fai_parse_region,
|
data/lib/hts/libhts/hfile.rb
CHANGED
|
@@ -46,10 +46,10 @@ module HTS
|
|
|
46
46
|
|
|
47
47
|
# Report the current stream offset
|
|
48
48
|
def self.htell(fp)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
fp[:offset] + (
|
|
49
|
+
base_ptr = fp.pointer
|
|
50
|
+
begin_ptr = base_ptr.get_pointer(fp.offset_of(:begin))
|
|
51
|
+
buffer_ptr = base_ptr.get_pointer(fp.offset_of(:buffer))
|
|
52
|
+
fp[:offset] + (begin_ptr.address - buffer_ptr.address)
|
|
53
53
|
end
|
|
54
54
|
|
|
55
55
|
# Read from the stream until the delimiter, up to a maximum length
|
data/lib/hts/libhts/hts.rb
CHANGED
data/lib/hts/libhts/sam.rb
CHANGED
|
@@ -382,11 +382,15 @@ module HTS
|
|
|
382
382
|
|
|
383
383
|
# Get the next read from a SAM/BAM/CRAM iterator
|
|
384
384
|
def self.sam_itr_next(htsfp, itr, r)
|
|
385
|
-
#
|
|
385
|
+
raise("#{htsfp[:fn] || 'File'} not BGZF compressed") unless htsfp[:is_bgzf] == 1 || htsfp[:is_cram] == 1
|
|
386
386
|
raise("Null iterator") if itr.null?
|
|
387
387
|
|
|
388
|
-
|
|
389
|
-
|
|
388
|
+
if itr[:multi] == 1
|
|
389
|
+
hts_itr_multi_next(htsfp, itr, r)
|
|
390
|
+
else
|
|
391
|
+
bgzf = htsfp[:is_bgzf] == 1 ? htsfp[:fp][:bgzf] : FFI::Pointer::NULL
|
|
392
|
+
hts_itr_next(bgzf, itr, r, htsfp)
|
|
393
|
+
end
|
|
390
394
|
end
|
|
391
395
|
|
|
392
396
|
attach_function \
|
|
@@ -508,7 +512,7 @@ module HTS
|
|
|
508
512
|
# Append tag data to a bam record
|
|
509
513
|
attach_function \
|
|
510
514
|
:bam_aux_append,
|
|
511
|
-
[Bam1, :string, :
|
|
515
|
+
[Bam1, :string, :char, :int, :pointer],
|
|
512
516
|
:int
|
|
513
517
|
|
|
514
518
|
# Delete tag data from a bam record
|
|
@@ -738,6 +742,18 @@ module HTS
|
|
|
738
742
|
:bam_mods_recorded,
|
|
739
743
|
[HtsBaseModState, :pointer],
|
|
740
744
|
:pointer
|
|
745
|
+
|
|
746
|
+
# Sets the header to the file
|
|
747
|
+
attach_function \
|
|
748
|
+
:sam_hdr_set,
|
|
749
|
+
[HtsFile, SamHdr, :int],
|
|
750
|
+
:int
|
|
751
|
+
|
|
752
|
+
# Get the header from the file pointer
|
|
753
|
+
attach_function \
|
|
754
|
+
:sam_hdr_get,
|
|
755
|
+
[HtsFile],
|
|
756
|
+
SamHdr
|
|
741
757
|
end
|
|
742
758
|
end
|
|
743
759
|
|
data/lib/hts/libhts/vcf.rb
CHANGED
|
@@ -270,12 +270,12 @@ module HTS
|
|
|
270
270
|
attach_function \
|
|
271
271
|
:bcf_hdr_get_hrec,
|
|
272
272
|
[BcfHdr, :int, :string, :string, :string],
|
|
273
|
-
|
|
273
|
+
:pointer
|
|
274
274
|
|
|
275
275
|
# Duplicate a header record
|
|
276
276
|
attach_function \
|
|
277
277
|
:bcf_hrec_dup,
|
|
278
|
-
[
|
|
278
|
+
[:pointer],
|
|
279
279
|
BcfHrec.by_ref
|
|
280
280
|
|
|
281
281
|
# Add a new header record key
|
|
@@ -304,7 +304,7 @@ module HTS
|
|
|
304
304
|
# Free up a header record and associated structures
|
|
305
305
|
attach_function \
|
|
306
306
|
:bcf_hrec_destroy,
|
|
307
|
-
[
|
|
307
|
+
[:pointer],
|
|
308
308
|
:void
|
|
309
309
|
|
|
310
310
|
# Individual record querying and manipulation routines
|
|
@@ -539,15 +539,18 @@ module HTS
|
|
|
539
539
|
[HtsFile],
|
|
540
540
|
:int
|
|
541
541
|
|
|
542
|
-
|
|
542
|
+
attach_variable \
|
|
543
543
|
:bcf_float_vector_end,
|
|
544
|
-
[],
|
|
545
544
|
:uint32
|
|
546
545
|
|
|
547
|
-
|
|
546
|
+
attach_variable \
|
|
548
547
|
:bcf_float_missing,
|
|
549
|
-
[],
|
|
550
548
|
:uint32
|
|
549
|
+
|
|
550
|
+
attach_function \
|
|
551
|
+
:bcf_format_gt_v2,
|
|
552
|
+
[BcfHdr, BcfFmt, :int, KString],
|
|
553
|
+
:int
|
|
551
554
|
end
|
|
552
555
|
end
|
|
553
556
|
|
data/lib/hts/libhts/vcf_funcs.rb
CHANGED
|
@@ -97,6 +97,11 @@ module HTS
|
|
|
97
97
|
bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
|
|
98
98
|
end
|
|
99
99
|
|
|
100
|
+
# Function for updating INFO fields (int64; VCF only)
|
|
101
|
+
def bcf_update_info_int64(hdr, line, key, values, n)
|
|
102
|
+
bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG)
|
|
103
|
+
end
|
|
104
|
+
|
|
100
105
|
# Function for updating INFO fields
|
|
101
106
|
def bcf_update_info_float(hdr, line, key, values, n)
|
|
102
107
|
bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
|
|
@@ -150,9 +155,19 @@ module HTS
|
|
|
150
155
|
0
|
|
151
156
|
end
|
|
152
157
|
|
|
158
|
+
# Macro for setting genotypes correctly
|
|
159
|
+
def bcf_gt_vector_end
|
|
160
|
+
bcf_int32_vector_end
|
|
161
|
+
end
|
|
162
|
+
|
|
153
163
|
# Macro for setting genotypes correctly
|
|
154
164
|
def bcf_gt_is_missing(val)
|
|
155
|
-
(val >> 1
|
|
165
|
+
((val >> 1) == 0 ? 1 : 0)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Macro for setting genotypes correctly
|
|
169
|
+
def bcf_gt_is_vector_end(val)
|
|
170
|
+
val == bcf_gt_vector_end ? 1 : 0
|
|
156
171
|
end
|
|
157
172
|
|
|
158
173
|
# Macro for setting genotypes correctly
|
|
@@ -175,6 +190,11 @@ module HTS
|
|
|
175
190
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
|
|
176
191
|
end
|
|
177
192
|
|
|
193
|
+
# Get INFO values (int64; VCF only)
|
|
194
|
+
def bcf_get_info_int64(hdr, line, tag, dst, ndst)
|
|
195
|
+
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_LONG)
|
|
196
|
+
end
|
|
197
|
+
|
|
178
198
|
# Get INFO values
|
|
179
199
|
def bcf_get_info_float(hdr, line, tag, dst, ndst)
|
|
180
200
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
|
|
@@ -265,7 +285,16 @@ module HTS
|
|
|
265
285
|
)[:val][:info][type] & 0xf
|
|
266
286
|
end
|
|
267
287
|
|
|
268
|
-
|
|
288
|
+
def bcf_hdr_idinfo_exists(hdr, type, int_id)
|
|
289
|
+
return false if int_id.negative? || int_id >= hdr[:n][LibHTS::BCF_DT_ID]
|
|
290
|
+
|
|
291
|
+
pair = LibHTS::BcfIdpair.new(
|
|
292
|
+
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
|
293
|
+
LibHTS::BcfIdpair.size * int_id # offset
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
!pair[:val].null? && bcf_hdr_id2coltype(hdr, type, int_id) != 0xf
|
|
297
|
+
end
|
|
269
298
|
|
|
270
299
|
# def bcf_hdr_id2hrec
|
|
271
300
|
|
data/lib/hts/tabix.rb
CHANGED
|
@@ -68,6 +68,7 @@ module HTS
|
|
|
68
68
|
end
|
|
69
69
|
|
|
70
70
|
def load_index(index_name = nil)
|
|
71
|
+
check_closed
|
|
71
72
|
if index_name
|
|
72
73
|
LibHTS.tbx_index_load2(@file_name, index_name)
|
|
73
74
|
else
|
|
@@ -76,17 +77,23 @@ module HTS
|
|
|
76
77
|
end
|
|
77
78
|
|
|
78
79
|
def index_loaded?
|
|
80
|
+
check_closed
|
|
79
81
|
!@idx.null?
|
|
80
82
|
end
|
|
81
83
|
|
|
82
84
|
def name2id(name)
|
|
85
|
+
check_closed
|
|
83
86
|
LibHTS.tbx_name2id(@idx, name)
|
|
84
87
|
end
|
|
85
88
|
|
|
86
89
|
def seqnames
|
|
90
|
+
check_closed
|
|
87
91
|
nseq = FFI::MemoryPointer.new(:int)
|
|
88
|
-
LibHTS.tbx_seqnames(@idx, nseq)
|
|
92
|
+
pts = LibHTS.tbx_seqnames(@idx, nseq)
|
|
93
|
+
begin
|
|
89
94
|
pts.read_array_of_pointer(nseq.read_int).map(&:read_string)
|
|
95
|
+
ensure
|
|
96
|
+
LibHTS.hts_free(pts) unless pts.null?
|
|
90
97
|
end
|
|
91
98
|
end
|
|
92
99
|
|
|
@@ -101,6 +108,18 @@ module HTS
|
|
|
101
108
|
end
|
|
102
109
|
end
|
|
103
110
|
|
|
111
|
+
def close
|
|
112
|
+
return if closed?
|
|
113
|
+
|
|
114
|
+
@idx.close if @idx && !@idx.null?
|
|
115
|
+
@idx = nil
|
|
116
|
+
super
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def closed?
|
|
120
|
+
@hts_file.nil? || @hts_file.null?
|
|
121
|
+
end
|
|
122
|
+
|
|
104
123
|
private
|
|
105
124
|
|
|
106
125
|
def queryi(id, start, end_, &block)
|
|
@@ -126,10 +145,18 @@ module HTS
|
|
|
126
145
|
def query_yield(qiter)
|
|
127
146
|
r = LibHTS::KString.new
|
|
128
147
|
begin
|
|
129
|
-
|
|
148
|
+
while (slen = LibHTS.tbx_itr_next(@hts_file, @idx, qiter, r)) >= 0
|
|
149
|
+
yield r.read_string_copy.split("\t")
|
|
150
|
+
end
|
|
151
|
+
raise if slen < -1
|
|
130
152
|
ensure
|
|
153
|
+
r.free_buffer
|
|
131
154
|
LibHTS.hts_itr_destroy(qiter)
|
|
132
155
|
end
|
|
133
156
|
end
|
|
157
|
+
|
|
158
|
+
def check_closed
|
|
159
|
+
raise IOError, "closed Tabix" if closed?
|
|
160
|
+
end
|
|
134
161
|
end
|
|
135
162
|
end
|
data/lib/hts/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: htslib
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
@@ -71,13 +71,13 @@ files:
|
|
|
71
71
|
- lib/hts/bam/pileup.rb
|
|
72
72
|
- lib/hts/bam/record.rb
|
|
73
73
|
- lib/hts/bcf.rb
|
|
74
|
+
- lib/hts/bcf/errors.rb
|
|
74
75
|
- lib/hts/bcf/format.rb
|
|
75
76
|
- lib/hts/bcf/header.rb
|
|
76
77
|
- lib/hts/bcf/header_record.rb
|
|
77
78
|
- lib/hts/bcf/info.rb
|
|
78
79
|
- lib/hts/bcf/record.rb
|
|
79
80
|
- lib/hts/faidx.rb
|
|
80
|
-
- lib/hts/faidx/sequence.rb
|
|
81
81
|
- lib/hts/ffi_ext/README.md
|
|
82
82
|
- lib/hts/ffi_ext/pointer.rb
|
|
83
83
|
- lib/hts/ffi_ext/struct.rb
|
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
119
119
|
- !ruby/object:Gem::Version
|
|
120
120
|
version: '0'
|
|
121
121
|
requirements: []
|
|
122
|
-
rubygems_version:
|
|
122
|
+
rubygems_version: 4.0.10
|
|
123
123
|
specification_version: 4
|
|
124
124
|
summary: HTSlib bindings for Ruby
|
|
125
125
|
test_files: []
|
data/lib/hts/faidx/sequence.rb
DELETED
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
module HTS
|
|
2
|
-
class Faidx
|
|
3
|
-
class Sequence
|
|
4
|
-
attr_reader :name, :faidx
|
|
5
|
-
|
|
6
|
-
def initialize(faidx, name)
|
|
7
|
-
raise unless faidx.has_key?(name)
|
|
8
|
-
|
|
9
|
-
@faidx = faidx
|
|
10
|
-
@name = name
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
def length
|
|
14
|
-
faidx.seq_len(name)
|
|
15
|
-
end
|
|
16
|
-
alias size length
|
|
17
|
-
|
|
18
|
-
def seq(start = nil, stop = nil)
|
|
19
|
-
faidx.seq(name, start, stop)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def qual(start = nil, stop = nil)
|
|
23
|
-
faidx.qual(name, start, stop)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def [](arg)
|
|
27
|
-
case arg
|
|
28
|
-
when Integer
|
|
29
|
-
if arg >= 0
|
|
30
|
-
start = arg
|
|
31
|
-
stop = arg
|
|
32
|
-
else
|
|
33
|
-
start = length + arg
|
|
34
|
-
stop = length + arg
|
|
35
|
-
end
|
|
36
|
-
when Range
|
|
37
|
-
arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
|
|
38
|
-
arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
|
|
39
|
-
if arg.begin.nil?
|
|
40
|
-
if arg.end.nil?
|
|
41
|
-
start = nil
|
|
42
|
-
stop = nil
|
|
43
|
-
else
|
|
44
|
-
start = 0
|
|
45
|
-
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
|
46
|
-
end
|
|
47
|
-
elsif arg.end.nil?
|
|
48
|
-
# always include the first base
|
|
49
|
-
start = arg.begin
|
|
50
|
-
stop = length - 1
|
|
51
|
-
else
|
|
52
|
-
start = arg.begin
|
|
53
|
-
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
|
54
|
-
end
|
|
55
|
-
else
|
|
56
|
-
raise ArgumentError
|
|
57
|
-
end
|
|
58
|
-
seq(start, stop)
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
end
|