htslib 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -7
- data/lib/hts/bam/aux.rb +1 -0
- data/lib/hts/bam/cigar.rb +5 -7
- data/lib/hts/bam/flag.rb +5 -0
- data/lib/hts/bam/header.rb +1 -0
- data/lib/hts/bam/record.rb +7 -4
- data/lib/hts/bam.rb +82 -21
- data/lib/hts/bcf/format.rb +4 -4
- data/lib/hts/bcf/header.rb +1 -0
- data/lib/hts/bcf/info.rb +3 -2
- data/lib/hts/bcf/record.rb +30 -29
- data/lib/hts/bcf.rb +106 -26
- data/lib/hts/faidx.rb +24 -14
- data/lib/hts/hts.rb +49 -2
- data/lib/hts/libhts/bgzf.rb +5 -5
- data/lib/hts/libhts/constants.rb +26 -5
- data/lib/hts/libhts/cram.rb +287 -279
- data/lib/hts/libhts/hfile.rb +29 -11
- data/lib/hts/libhts/hts.rb +158 -25
- data/lib/hts/libhts/sam.rb +683 -94
- data/lib/hts/libhts/sam_funcs.rb +92 -588
- data/lib/hts/libhts/vcf.rb +433 -234
- data/lib/hts/libhts/vcf_funcs.rb +232 -424
- data/lib/hts/libhts.rb +1 -0
- data/lib/hts/tbx.rb +3 -4
- data/lib/hts/version.rb +1 -1
- metadata +3 -3
data/lib/hts/bcf.rb
CHANGED
@@ -9,6 +9,7 @@ require_relative "bcf/format"
|
|
9
9
|
require_relative "bcf/record"
|
10
10
|
|
11
11
|
module HTS
|
12
|
+
# A class for working with VCF, BCF files.
|
12
13
|
class Bcf < Hts
|
13
14
|
include Enumerable
|
14
15
|
|
@@ -26,7 +27,7 @@ module HTS
|
|
26
27
|
file
|
27
28
|
end
|
28
29
|
|
29
|
-
def initialize(file_name, mode = "r", index: nil,
|
30
|
+
def initialize(file_name, mode = "r", index: nil, threads: nil,
|
30
31
|
create_index: false)
|
31
32
|
if block_given?
|
32
33
|
message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
|
@@ -42,32 +43,31 @@ module HTS
|
|
42
43
|
|
43
44
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
|
44
45
|
|
45
|
-
if threads
|
46
|
-
r = LibHTS.hts_set_threads(@hts_file, threads)
|
47
|
-
raise "Failed to set number of threads: #{threads}" if r < 0
|
48
|
-
end
|
46
|
+
set_threads(threads) if threads
|
49
47
|
|
50
48
|
return if @mode[0] == "w"
|
51
49
|
|
52
50
|
@header = Bcf::Header.new(@hts_file)
|
53
|
-
|
54
51
|
create_index(index) if create_index
|
55
|
-
|
56
52
|
@idx = load_index(index)
|
57
|
-
|
58
53
|
@start_position = tell
|
54
|
+
super # do nothing
|
59
55
|
end
|
60
56
|
|
61
57
|
def create_index(index_name = nil)
|
58
|
+
check_closed
|
59
|
+
|
62
60
|
warn "Create index for #{@file_name} to #{index_name}"
|
63
|
-
if
|
64
|
-
LibHTS.bcf_index_build2(@
|
61
|
+
if index_name
|
62
|
+
LibHTS.bcf_index_build2(@file_name, index_name, -1)
|
65
63
|
else
|
66
|
-
LibHTS.bcf_index_build(@
|
64
|
+
LibHTS.bcf_index_build(@file_name, -1)
|
67
65
|
end
|
68
66
|
end
|
69
67
|
|
70
68
|
def load_index(index_name = nil)
|
69
|
+
check_closed
|
70
|
+
|
71
71
|
if index_name
|
72
72
|
LibHTS.bcf_index_load2(@file_name, index_name)
|
73
73
|
else
|
@@ -76,39 +76,50 @@ module HTS
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def index_loaded?
|
79
|
+
check_closed
|
80
|
+
|
79
81
|
!@idx.null?
|
80
82
|
end
|
81
83
|
|
82
84
|
def write_header
|
83
|
-
|
85
|
+
check_closed
|
84
86
|
|
85
87
|
@header = header.dup
|
86
88
|
LibHTS.hts_set_fai_filename(header, @file_name)
|
87
|
-
LibHTS.bcf_hdr_write(@hts_file, header
|
89
|
+
LibHTS.bcf_hdr_write(@hts_file, header)
|
88
90
|
end
|
89
91
|
|
90
92
|
def write(var)
|
91
|
-
|
93
|
+
check_closed
|
92
94
|
|
93
|
-
var_dup = var.dup
|
95
|
+
var_dup = var.dup
|
94
96
|
LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
|
95
97
|
end
|
96
98
|
|
97
99
|
# Close the current file.
|
98
100
|
|
99
101
|
def nsamples
|
102
|
+
check_closed
|
103
|
+
|
100
104
|
header.nsamples
|
101
105
|
end
|
102
106
|
|
103
107
|
def samples
|
108
|
+
check_closed
|
109
|
+
|
104
110
|
header.samples
|
105
111
|
end
|
106
112
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
113
|
+
def each(copy: false, &block)
|
114
|
+
if copy
|
115
|
+
each_record_copy(&block)
|
116
|
+
else
|
117
|
+
each_record_reuse(&block)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
private def each_record_copy
|
122
|
+
check_closed
|
112
123
|
|
113
124
|
return to_enum(__method__) unless block_given?
|
114
125
|
|
@@ -119,12 +130,10 @@ module HTS
|
|
119
130
|
self
|
120
131
|
end
|
121
132
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
raise IOError, "closed stream" if closed?
|
127
|
-
|
133
|
+
private def each_record_reuse
|
134
|
+
check_closed
|
135
|
+
# Each does not always start at the beginning of the file.
|
136
|
+
# This is the common behavior of IO objects in Ruby.
|
128
137
|
return to_enum(__method__) unless block_given?
|
129
138
|
|
130
139
|
bcf1 = LibHTS.bcf_init
|
@@ -132,5 +141,76 @@ module HTS
|
|
132
141
|
yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
|
133
142
|
self
|
134
143
|
end
|
144
|
+
|
145
|
+
# @!macro [attach] define_getter
|
146
|
+
# @method $1
|
147
|
+
# Get $1 array
|
148
|
+
# @return [Array] the $1 array
|
149
|
+
define_getter :chrom
|
150
|
+
define_getter :pos
|
151
|
+
define_getter :endpos
|
152
|
+
define_getter :id
|
153
|
+
define_getter :ref
|
154
|
+
define_getter :alt
|
155
|
+
define_getter :qual
|
156
|
+
define_getter :filter
|
157
|
+
|
158
|
+
def info(key = nil)
|
159
|
+
check_closed
|
160
|
+
position = tell
|
161
|
+
if key
|
162
|
+
ary = map { |r| r.info(key) }
|
163
|
+
else
|
164
|
+
raise NotImplementedError
|
165
|
+
# ary = each_copy.map { |r| r.info }
|
166
|
+
# ary = map { |r| r.info.clone }
|
167
|
+
end
|
168
|
+
seek(position)
|
169
|
+
ary
|
170
|
+
end
|
171
|
+
|
172
|
+
def format(key = nil)
|
173
|
+
check_closed
|
174
|
+
position = tell
|
175
|
+
if key
|
176
|
+
ary = map { |r| r.format(key) }
|
177
|
+
else
|
178
|
+
raise NotImplementedError
|
179
|
+
# ary = each_copy.map { |r| r.format }
|
180
|
+
# ary = map { |r| r.format.clone }
|
181
|
+
end
|
182
|
+
seek(position)
|
183
|
+
ary
|
184
|
+
end
|
185
|
+
|
186
|
+
# @!macro [attach] define_iterator
|
187
|
+
# @method each_$1
|
188
|
+
# Get $1 iterator
|
189
|
+
define_iterator :chrom
|
190
|
+
define_iterator :pos
|
191
|
+
define_iterator :endpos
|
192
|
+
define_iterator :id
|
193
|
+
define_iterator :ref
|
194
|
+
define_iterator :alt
|
195
|
+
define_iterator :qual
|
196
|
+
define_iterator :filter
|
197
|
+
|
198
|
+
def each_info(key)
|
199
|
+
check_closed
|
200
|
+
return to_enum(__method__) unless block
|
201
|
+
|
202
|
+
each do |r|
|
203
|
+
yield r.info(key)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def each_format(key)
|
208
|
+
check_closed
|
209
|
+
return to_enum(__method__) unless block
|
210
|
+
|
211
|
+
each do |r|
|
212
|
+
yield r.format(key)
|
213
|
+
end
|
214
|
+
end
|
135
215
|
end
|
136
216
|
end
|
data/lib/hts/faidx.rb
CHANGED
@@ -6,22 +6,32 @@ module HTS
|
|
6
6
|
class Faidx
|
7
7
|
attr_reader :file_name
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
def self.open(*args, **kw)
|
10
|
+
file = new(*args, **kw) # do not yield
|
11
|
+
return file unless block_given?
|
12
|
+
|
13
|
+
begin
|
14
|
+
yield file
|
15
|
+
ensure
|
16
|
+
file.close
|
17
|
+
end
|
18
|
+
file
|
11
19
|
end
|
12
20
|
|
13
21
|
def initialize(file_name)
|
22
|
+
if block_given?
|
23
|
+
message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
|
24
|
+
raise message
|
25
|
+
end
|
26
|
+
|
14
27
|
@file_name = file_name
|
15
28
|
@fai = LibHTS.fai_load(@file_name)
|
16
29
|
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
close
|
23
|
-
end
|
24
|
-
end
|
30
|
+
raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
|
31
|
+
end
|
32
|
+
|
33
|
+
def struct
|
34
|
+
@fai
|
25
35
|
end
|
26
36
|
|
27
37
|
def close
|
@@ -29,10 +39,10 @@ module HTS
|
|
29
39
|
end
|
30
40
|
|
31
41
|
# the number of sequences in the index.
|
32
|
-
def
|
42
|
+
def length
|
33
43
|
LibHTS.faidx_nseq(@fai)
|
34
44
|
end
|
35
|
-
alias length
|
45
|
+
alias size length
|
36
46
|
|
37
47
|
# return the length of the requested chromosome.
|
38
48
|
def chrom_size(chrom)
|
@@ -48,10 +58,10 @@ module HTS
|
|
48
58
|
alias chrom_length chrom_size
|
49
59
|
|
50
60
|
# FIXME: naming and syntax
|
51
|
-
def cget; end
|
61
|
+
# def cget; end
|
52
62
|
|
53
63
|
# FIXME: naming and syntax
|
54
|
-
def get; end
|
64
|
+
# def get; end
|
55
65
|
|
56
66
|
# __iter__
|
57
67
|
end
|
data/lib/hts/hts.rb
CHANGED
@@ -3,7 +3,38 @@
|
|
3
3
|
require_relative "../htslib"
|
4
4
|
|
5
5
|
module HTS
|
6
|
+
# A base class for hts files.
|
6
7
|
class Hts
|
8
|
+
class << self
|
9
|
+
private
|
10
|
+
|
11
|
+
def define_getter(name)
|
12
|
+
define_method(name) do
|
13
|
+
check_closed
|
14
|
+
position = tell
|
15
|
+
ary = map(&name)
|
16
|
+
seek(position)
|
17
|
+
ary
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def define_iterator(name)
|
22
|
+
define_method("each_#{name}") do |&block|
|
23
|
+
check_closed
|
24
|
+
return to_enum(__method__) unless block
|
25
|
+
|
26
|
+
each do |record|
|
27
|
+
block.call(record.public_send(name))
|
28
|
+
end
|
29
|
+
self
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize(*args)
|
35
|
+
# do nothing
|
36
|
+
end
|
37
|
+
|
7
38
|
def struct
|
8
39
|
@hts_file
|
9
40
|
end
|
@@ -12,11 +43,11 @@ module HTS
|
|
12
43
|
@hts_file.to_ptr
|
13
44
|
end
|
14
45
|
|
15
|
-
def
|
46
|
+
def file_format
|
16
47
|
LibHTS.hts_get_format(@hts_file)[:format].to_s
|
17
48
|
end
|
18
49
|
|
19
|
-
def
|
50
|
+
def file_format_version
|
20
51
|
v = LibHTS.hts_get_format(@hts_file)[:version]
|
21
52
|
major = v[:major]
|
22
53
|
minor = v[:minor]
|
@@ -38,6 +69,16 @@ module HTS
|
|
38
69
|
@hts_file.nil? || @hts_file.null?
|
39
70
|
end
|
40
71
|
|
72
|
+
def set_threads(n)
|
73
|
+
raise TypeError unless n.is_a(Integer)
|
74
|
+
|
75
|
+
if n > 0
|
76
|
+
r = LibHTS.hts_set_threads(@hts_file, n)
|
77
|
+
raise "Failed to set number of threads: #{threads}" if r < 0
|
78
|
+
end
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
41
82
|
def seek(offset)
|
42
83
|
if @hts_file[:is_cram] == 1
|
43
84
|
LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
|
@@ -70,5 +111,11 @@ module HTS
|
|
70
111
|
raise "Cannot rewind: no start position"
|
71
112
|
end
|
72
113
|
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
def check_closed
|
118
|
+
raise IOError, "closed stream" if closed?
|
119
|
+
end
|
73
120
|
end
|
74
121
|
end
|
data/lib/hts/libhts/bgzf.rb
CHANGED
@@ -18,19 +18,19 @@ module HTS
|
|
18
18
|
# Open an existing hFILE stream for reading or writing.
|
19
19
|
attach_function \
|
20
20
|
:bgzf_hopen,
|
21
|
-
[
|
21
|
+
[HFile, :string],
|
22
22
|
BGZF.by_ref
|
23
23
|
|
24
24
|
# Close the BGZF and free all associated resources.
|
25
25
|
attach_function \
|
26
26
|
:bgzf_close,
|
27
|
-
[
|
27
|
+
[HFile],
|
28
28
|
:int
|
29
29
|
|
30
30
|
# Read up to _length_ bytes from the file storing into _data_.
|
31
31
|
attach_function \
|
32
32
|
:bgzf_read,
|
33
|
-
[
|
33
|
+
[HFile, :pointer, :size_t],
|
34
34
|
:ssize_t
|
35
35
|
|
36
36
|
# Write _length_ bytes from _data_ to the file. If no I/O errors occur,
|
@@ -181,7 +181,7 @@ module HTS
|
|
181
181
|
# Load BGZF index from an hFILE
|
182
182
|
attach_function \
|
183
183
|
:bgzf_index_load_hfile,
|
184
|
-
[BGZF,
|
184
|
+
[BGZF, HFile, :string],
|
185
185
|
:int
|
186
186
|
|
187
187
|
# Save BGZF index
|
@@ -193,7 +193,7 @@ module HTS
|
|
193
193
|
# Write a BGZF index to an hFILE
|
194
194
|
attach_function \
|
195
195
|
:bgzf_index_dump_hfile,
|
196
|
-
[BGZF,
|
196
|
+
[BGZF, HFile, :string],
|
197
197
|
:int
|
198
198
|
end
|
199
199
|
end
|
data/lib/hts/libhts/constants.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
+
# Module for working with C HTSlib.
|
4
5
|
module LibHTS
|
5
6
|
typedef :int64, :hts_pos_t
|
6
7
|
typedef :pointer, :bam_plp_auto_f
|
@@ -24,9 +25,9 @@ module HTS
|
|
24
25
|
:f, :pointer # kstream_t
|
25
26
|
end
|
26
27
|
|
27
|
-
#
|
28
|
+
# HFile
|
28
29
|
|
29
|
-
class
|
30
|
+
class HFile < FFI::BitStruct
|
30
31
|
layout \
|
31
32
|
:buffer, :string,
|
32
33
|
:begin, :string,
|
@@ -56,7 +57,7 @@ module HTS
|
|
56
57
|
:uncompressed_block, :pointer,
|
57
58
|
:compressed_block, :pointer,
|
58
59
|
:cache, :pointer,
|
59
|
-
:fp,
|
60
|
+
:fp, HFile.ptr,
|
60
61
|
:mt, :pointer,
|
61
62
|
:idx, :pointer,
|
62
63
|
:idx_build_otf, :int,
|
@@ -189,6 +190,16 @@ module HTS
|
|
189
190
|
)
|
190
191
|
end
|
191
192
|
|
193
|
+
class HtsReglist < FFI::Struct
|
194
|
+
layout \
|
195
|
+
:reg, :string,
|
196
|
+
:intervals, :pointer, # hts_pair_pos_t
|
197
|
+
:tid, :int,
|
198
|
+
:count, :uint32_t,
|
199
|
+
:min_beg, :hts_pos_t,
|
200
|
+
:max_end, :hts_pos_t
|
201
|
+
end
|
202
|
+
|
192
203
|
# HtsFile
|
193
204
|
class SamHdr < FFI::Struct
|
194
205
|
layout \
|
@@ -217,7 +228,7 @@ module HTS
|
|
217
228
|
union_layout(
|
218
229
|
:bgzf, BGZF.ptr,
|
219
230
|
:cram, :pointer, # cram_fd
|
220
|
-
:hfile,
|
231
|
+
:hfile, HFile.ptr
|
221
232
|
),
|
222
233
|
:state, :pointer,
|
223
234
|
:format, HtsFormat,
|
@@ -263,7 +274,7 @@ module HTS
|
|
263
274
|
:n_reg, :int,
|
264
275
|
:beg, :int64,
|
265
276
|
:end, :int64,
|
266
|
-
:reg_list, :pointer,
|
277
|
+
:reg_list, :pointer, # HtsReglist.ptr,
|
267
278
|
:curr_tid, :int,
|
268
279
|
:curr_reg, :int,
|
269
280
|
:curr_intv, :int,
|
@@ -516,5 +527,15 @@ module HTS
|
|
516
527
|
LibHTS.bcf_destroy(ptr) unless ptr.null?
|
517
528
|
end
|
518
529
|
end
|
530
|
+
|
531
|
+
CramContentType = enum(
|
532
|
+
:ct_error, -1,
|
533
|
+
:file_header, 0,
|
534
|
+
:compression_header, 1,
|
535
|
+
:mapped_slice, 2,
|
536
|
+
:unmapped_slice, 3, # cram v1.0 only
|
537
|
+
:external, 4,
|
538
|
+
:core, 5
|
539
|
+
)
|
519
540
|
end
|
520
541
|
end
|