htslib 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -7
- data/lib/hts/bam/aux.rb +1 -0
- data/lib/hts/bam/cigar.rb +5 -7
- data/lib/hts/bam/flag.rb +5 -0
- data/lib/hts/bam/header.rb +1 -0
- data/lib/hts/bam/record.rb +7 -4
- data/lib/hts/bam.rb +82 -21
- data/lib/hts/bcf/format.rb +4 -4
- data/lib/hts/bcf/header.rb +1 -0
- data/lib/hts/bcf/info.rb +3 -2
- data/lib/hts/bcf/record.rb +30 -29
- data/lib/hts/bcf.rb +106 -26
- data/lib/hts/faidx.rb +24 -14
- data/lib/hts/hts.rb +49 -2
- data/lib/hts/libhts/bgzf.rb +5 -5
- data/lib/hts/libhts/constants.rb +26 -5
- data/lib/hts/libhts/cram.rb +287 -279
- data/lib/hts/libhts/hfile.rb +29 -11
- data/lib/hts/libhts/hts.rb +158 -25
- data/lib/hts/libhts/sam.rb +683 -94
- data/lib/hts/libhts/sam_funcs.rb +92 -588
- data/lib/hts/libhts/vcf.rb +433 -234
- data/lib/hts/libhts/vcf_funcs.rb +232 -424
- data/lib/hts/libhts.rb +1 -0
- data/lib/hts/tbx.rb +3 -4
- data/lib/hts/version.rb +1 -1
- metadata +3 -3
data/lib/hts/bcf.rb
CHANGED
@@ -9,6 +9,7 @@ require_relative "bcf/format"
|
|
9
9
|
require_relative "bcf/record"
|
10
10
|
|
11
11
|
module HTS
|
12
|
+
# A class for working with VCF, BCF files.
|
12
13
|
class Bcf < Hts
|
13
14
|
include Enumerable
|
14
15
|
|
@@ -26,7 +27,7 @@ module HTS
|
|
26
27
|
file
|
27
28
|
end
|
28
29
|
|
29
|
-
def initialize(file_name, mode = "r", index: nil,
|
30
|
+
def initialize(file_name, mode = "r", index: nil, threads: nil,
|
30
31
|
create_index: false)
|
31
32
|
if block_given?
|
32
33
|
message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
|
@@ -42,32 +43,31 @@ module HTS
|
|
42
43
|
|
43
44
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
|
44
45
|
|
45
|
-
if threads
|
46
|
-
r = LibHTS.hts_set_threads(@hts_file, threads)
|
47
|
-
raise "Failed to set number of threads: #{threads}" if r < 0
|
48
|
-
end
|
46
|
+
set_threads(threads) if threads
|
49
47
|
|
50
48
|
return if @mode[0] == "w"
|
51
49
|
|
52
50
|
@header = Bcf::Header.new(@hts_file)
|
53
|
-
|
54
51
|
create_index(index) if create_index
|
55
|
-
|
56
52
|
@idx = load_index(index)
|
57
|
-
|
58
53
|
@start_position = tell
|
54
|
+
super # do nothing
|
59
55
|
end
|
60
56
|
|
61
57
|
def create_index(index_name = nil)
|
58
|
+
check_closed
|
59
|
+
|
62
60
|
warn "Create index for #{@file_name} to #{index_name}"
|
63
|
-
if
|
64
|
-
LibHTS.bcf_index_build2(@
|
61
|
+
if index_name
|
62
|
+
LibHTS.bcf_index_build2(@file_name, index_name, -1)
|
65
63
|
else
|
66
|
-
LibHTS.bcf_index_build(@
|
64
|
+
LibHTS.bcf_index_build(@file_name, -1)
|
67
65
|
end
|
68
66
|
end
|
69
67
|
|
70
68
|
def load_index(index_name = nil)
|
69
|
+
check_closed
|
70
|
+
|
71
71
|
if index_name
|
72
72
|
LibHTS.bcf_index_load2(@file_name, index_name)
|
73
73
|
else
|
@@ -76,39 +76,50 @@ module HTS
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def index_loaded?
|
79
|
+
check_closed
|
80
|
+
|
79
81
|
!@idx.null?
|
80
82
|
end
|
81
83
|
|
82
84
|
def write_header
|
83
|
-
|
85
|
+
check_closed
|
84
86
|
|
85
87
|
@header = header.dup
|
86
88
|
LibHTS.hts_set_fai_filename(header, @file_name)
|
87
|
-
LibHTS.bcf_hdr_write(@hts_file, header
|
89
|
+
LibHTS.bcf_hdr_write(@hts_file, header)
|
88
90
|
end
|
89
91
|
|
90
92
|
def write(var)
|
91
|
-
|
93
|
+
check_closed
|
92
94
|
|
93
|
-
var_dup = var.dup
|
95
|
+
var_dup = var.dup
|
94
96
|
LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
|
95
97
|
end
|
96
98
|
|
97
99
|
# Close the current file.
|
98
100
|
|
99
101
|
def nsamples
|
102
|
+
check_closed
|
103
|
+
|
100
104
|
header.nsamples
|
101
105
|
end
|
102
106
|
|
103
107
|
def samples
|
108
|
+
check_closed
|
109
|
+
|
104
110
|
header.samples
|
105
111
|
end
|
106
112
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
113
|
+
def each(copy: false, &block)
|
114
|
+
if copy
|
115
|
+
each_record_copy(&block)
|
116
|
+
else
|
117
|
+
each_record_reuse(&block)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
private def each_record_copy
|
122
|
+
check_closed
|
112
123
|
|
113
124
|
return to_enum(__method__) unless block_given?
|
114
125
|
|
@@ -119,12 +130,10 @@ module HTS
|
|
119
130
|
self
|
120
131
|
end
|
121
132
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
raise IOError, "closed stream" if closed?
|
127
|
-
|
133
|
+
private def each_record_reuse
|
134
|
+
check_closed
|
135
|
+
# Each does not always start at the beginning of the file.
|
136
|
+
# This is the common behavior of IO objects in Ruby.
|
128
137
|
return to_enum(__method__) unless block_given?
|
129
138
|
|
130
139
|
bcf1 = LibHTS.bcf_init
|
@@ -132,5 +141,76 @@ module HTS
|
|
132
141
|
yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
|
133
142
|
self
|
134
143
|
end
|
144
|
+
|
145
|
+
# @!macro [attach] define_getter
|
146
|
+
# @method $1
|
147
|
+
# Get $1 array
|
148
|
+
# @return [Array] the $1 array
|
149
|
+
define_getter :chrom
|
150
|
+
define_getter :pos
|
151
|
+
define_getter :endpos
|
152
|
+
define_getter :id
|
153
|
+
define_getter :ref
|
154
|
+
define_getter :alt
|
155
|
+
define_getter :qual
|
156
|
+
define_getter :filter
|
157
|
+
|
158
|
+
def info(key = nil)
|
159
|
+
check_closed
|
160
|
+
position = tell
|
161
|
+
if key
|
162
|
+
ary = map { |r| r.info(key) }
|
163
|
+
else
|
164
|
+
raise NotImplementedError
|
165
|
+
# ary = each_copy.map { |r| r.info }
|
166
|
+
# ary = map { |r| r.info.clone }
|
167
|
+
end
|
168
|
+
seek(position)
|
169
|
+
ary
|
170
|
+
end
|
171
|
+
|
172
|
+
def format(key = nil)
|
173
|
+
check_closed
|
174
|
+
position = tell
|
175
|
+
if key
|
176
|
+
ary = map { |r| r.format(key) }
|
177
|
+
else
|
178
|
+
raise NotImplementedError
|
179
|
+
# ary = each_copy.map { |r| r.format }
|
180
|
+
# ary = map { |r| r.format.clone }
|
181
|
+
end
|
182
|
+
seek(position)
|
183
|
+
ary
|
184
|
+
end
|
185
|
+
|
186
|
+
# @!macro [attach] define_iterator
|
187
|
+
# @method each_$1
|
188
|
+
# Get $1 iterator
|
189
|
+
define_iterator :chrom
|
190
|
+
define_iterator :pos
|
191
|
+
define_iterator :endpos
|
192
|
+
define_iterator :id
|
193
|
+
define_iterator :ref
|
194
|
+
define_iterator :alt
|
195
|
+
define_iterator :qual
|
196
|
+
define_iterator :filter
|
197
|
+
|
198
|
+
def each_info(key)
|
199
|
+
check_closed
|
200
|
+
return to_enum(__method__) unless block
|
201
|
+
|
202
|
+
each do |r|
|
203
|
+
yield r.info(key)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def each_format(key)
|
208
|
+
check_closed
|
209
|
+
return to_enum(__method__) unless block
|
210
|
+
|
211
|
+
each do |r|
|
212
|
+
yield r.format(key)
|
213
|
+
end
|
214
|
+
end
|
135
215
|
end
|
136
216
|
end
|
data/lib/hts/faidx.rb
CHANGED
@@ -6,22 +6,32 @@ module HTS
|
|
6
6
|
class Faidx
|
7
7
|
attr_reader :file_name
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
def self.open(*args, **kw)
|
10
|
+
file = new(*args, **kw) # do not yield
|
11
|
+
return file unless block_given?
|
12
|
+
|
13
|
+
begin
|
14
|
+
yield file
|
15
|
+
ensure
|
16
|
+
file.close
|
17
|
+
end
|
18
|
+
file
|
11
19
|
end
|
12
20
|
|
13
21
|
def initialize(file_name)
|
22
|
+
if block_given?
|
23
|
+
message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
|
24
|
+
raise message
|
25
|
+
end
|
26
|
+
|
14
27
|
@file_name = file_name
|
15
28
|
@fai = LibHTS.fai_load(@file_name)
|
16
29
|
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
close
|
23
|
-
end
|
24
|
-
end
|
30
|
+
raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
|
31
|
+
end
|
32
|
+
|
33
|
+
def struct
|
34
|
+
@fai
|
25
35
|
end
|
26
36
|
|
27
37
|
def close
|
@@ -29,10 +39,10 @@ module HTS
|
|
29
39
|
end
|
30
40
|
|
31
41
|
# the number of sequences in the index.
|
32
|
-
def
|
42
|
+
def length
|
33
43
|
LibHTS.faidx_nseq(@fai)
|
34
44
|
end
|
35
|
-
alias length
|
45
|
+
alias size length
|
36
46
|
|
37
47
|
# return the length of the requested chromosome.
|
38
48
|
def chrom_size(chrom)
|
@@ -48,10 +58,10 @@ module HTS
|
|
48
58
|
alias chrom_length chrom_size
|
49
59
|
|
50
60
|
# FIXME: naming and syntax
|
51
|
-
def cget; end
|
61
|
+
# def cget; end
|
52
62
|
|
53
63
|
# FIXME: naming and syntax
|
54
|
-
def get; end
|
64
|
+
# def get; end
|
55
65
|
|
56
66
|
# __iter__
|
57
67
|
end
|
data/lib/hts/hts.rb
CHANGED
@@ -3,7 +3,38 @@
|
|
3
3
|
require_relative "../htslib"
|
4
4
|
|
5
5
|
module HTS
|
6
|
+
# A base class for hts files.
|
6
7
|
class Hts
|
8
|
+
class << self
|
9
|
+
private
|
10
|
+
|
11
|
+
def define_getter(name)
|
12
|
+
define_method(name) do
|
13
|
+
check_closed
|
14
|
+
position = tell
|
15
|
+
ary = map(&name)
|
16
|
+
seek(position)
|
17
|
+
ary
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def define_iterator(name)
|
22
|
+
define_method("each_#{name}") do |&block|
|
23
|
+
check_closed
|
24
|
+
return to_enum(__method__) unless block
|
25
|
+
|
26
|
+
each do |record|
|
27
|
+
block.call(record.public_send(name))
|
28
|
+
end
|
29
|
+
self
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize(*args)
|
35
|
+
# do nothing
|
36
|
+
end
|
37
|
+
|
7
38
|
def struct
|
8
39
|
@hts_file
|
9
40
|
end
|
@@ -12,11 +43,11 @@ module HTS
|
|
12
43
|
@hts_file.to_ptr
|
13
44
|
end
|
14
45
|
|
15
|
-
def
|
46
|
+
def file_format
|
16
47
|
LibHTS.hts_get_format(@hts_file)[:format].to_s
|
17
48
|
end
|
18
49
|
|
19
|
-
def
|
50
|
+
def file_format_version
|
20
51
|
v = LibHTS.hts_get_format(@hts_file)[:version]
|
21
52
|
major = v[:major]
|
22
53
|
minor = v[:minor]
|
@@ -38,6 +69,16 @@ module HTS
|
|
38
69
|
@hts_file.nil? || @hts_file.null?
|
39
70
|
end
|
40
71
|
|
72
|
+
def set_threads(n)
|
73
|
+
raise TypeError unless n.is_a(Integer)
|
74
|
+
|
75
|
+
if n > 0
|
76
|
+
r = LibHTS.hts_set_threads(@hts_file, n)
|
77
|
+
raise "Failed to set number of threads: #{threads}" if r < 0
|
78
|
+
end
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
41
82
|
def seek(offset)
|
42
83
|
if @hts_file[:is_cram] == 1
|
43
84
|
LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
|
@@ -70,5 +111,11 @@ module HTS
|
|
70
111
|
raise "Cannot rewind: no start position"
|
71
112
|
end
|
72
113
|
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
def check_closed
|
118
|
+
raise IOError, "closed stream" if closed?
|
119
|
+
end
|
73
120
|
end
|
74
121
|
end
|
data/lib/hts/libhts/bgzf.rb
CHANGED
@@ -18,19 +18,19 @@ module HTS
|
|
18
18
|
# Open an existing hFILE stream for reading or writing.
|
19
19
|
attach_function \
|
20
20
|
:bgzf_hopen,
|
21
|
-
[
|
21
|
+
[HFile, :string],
|
22
22
|
BGZF.by_ref
|
23
23
|
|
24
24
|
# Close the BGZF and free all associated resources.
|
25
25
|
attach_function \
|
26
26
|
:bgzf_close,
|
27
|
-
[
|
27
|
+
[HFile],
|
28
28
|
:int
|
29
29
|
|
30
30
|
# Read up to _length_ bytes from the file storing into _data_.
|
31
31
|
attach_function \
|
32
32
|
:bgzf_read,
|
33
|
-
[
|
33
|
+
[HFile, :pointer, :size_t],
|
34
34
|
:ssize_t
|
35
35
|
|
36
36
|
# Write _length_ bytes from _data_ to the file. If no I/O errors occur,
|
@@ -181,7 +181,7 @@ module HTS
|
|
181
181
|
# Load BGZF index from an hFILE
|
182
182
|
attach_function \
|
183
183
|
:bgzf_index_load_hfile,
|
184
|
-
[BGZF,
|
184
|
+
[BGZF, HFile, :string],
|
185
185
|
:int
|
186
186
|
|
187
187
|
# Save BGZF index
|
@@ -193,7 +193,7 @@ module HTS
|
|
193
193
|
# Write a BGZF index to an hFILE
|
194
194
|
attach_function \
|
195
195
|
:bgzf_index_dump_hfile,
|
196
|
-
[BGZF,
|
196
|
+
[BGZF, HFile, :string],
|
197
197
|
:int
|
198
198
|
end
|
199
199
|
end
|
data/lib/hts/libhts/constants.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
+
# Module for working with C HTSlib.
|
4
5
|
module LibHTS
|
5
6
|
typedef :int64, :hts_pos_t
|
6
7
|
typedef :pointer, :bam_plp_auto_f
|
@@ -24,9 +25,9 @@ module HTS
|
|
24
25
|
:f, :pointer # kstream_t
|
25
26
|
end
|
26
27
|
|
27
|
-
#
|
28
|
+
# HFile
|
28
29
|
|
29
|
-
class
|
30
|
+
class HFile < FFI::BitStruct
|
30
31
|
layout \
|
31
32
|
:buffer, :string,
|
32
33
|
:begin, :string,
|
@@ -56,7 +57,7 @@ module HTS
|
|
56
57
|
:uncompressed_block, :pointer,
|
57
58
|
:compressed_block, :pointer,
|
58
59
|
:cache, :pointer,
|
59
|
-
:fp,
|
60
|
+
:fp, HFile.ptr,
|
60
61
|
:mt, :pointer,
|
61
62
|
:idx, :pointer,
|
62
63
|
:idx_build_otf, :int,
|
@@ -189,6 +190,16 @@ module HTS
|
|
189
190
|
)
|
190
191
|
end
|
191
192
|
|
193
|
+
class HtsReglist < FFI::Struct
|
194
|
+
layout \
|
195
|
+
:reg, :string,
|
196
|
+
:intervals, :pointer, # hts_pair_pos_t
|
197
|
+
:tid, :int,
|
198
|
+
:count, :uint32_t,
|
199
|
+
:min_beg, :hts_pos_t,
|
200
|
+
:max_end, :hts_pos_t
|
201
|
+
end
|
202
|
+
|
192
203
|
# HtsFile
|
193
204
|
class SamHdr < FFI::Struct
|
194
205
|
layout \
|
@@ -217,7 +228,7 @@ module HTS
|
|
217
228
|
union_layout(
|
218
229
|
:bgzf, BGZF.ptr,
|
219
230
|
:cram, :pointer, # cram_fd
|
220
|
-
:hfile,
|
231
|
+
:hfile, HFile.ptr
|
221
232
|
),
|
222
233
|
:state, :pointer,
|
223
234
|
:format, HtsFormat,
|
@@ -263,7 +274,7 @@ module HTS
|
|
263
274
|
:n_reg, :int,
|
264
275
|
:beg, :int64,
|
265
276
|
:end, :int64,
|
266
|
-
:reg_list, :pointer,
|
277
|
+
:reg_list, :pointer, # HtsReglist.ptr,
|
267
278
|
:curr_tid, :int,
|
268
279
|
:curr_reg, :int,
|
269
280
|
:curr_intv, :int,
|
@@ -516,5 +527,15 @@ module HTS
|
|
516
527
|
LibHTS.bcf_destroy(ptr) unless ptr.null?
|
517
528
|
end
|
518
529
|
end
|
530
|
+
|
531
|
+
CramContentType = enum(
|
532
|
+
:ct_error, -1,
|
533
|
+
:file_header, 0,
|
534
|
+
:compression_header, 1,
|
535
|
+
:mapped_slice, 2,
|
536
|
+
:unmapped_slice, 3, # cram v1.0 only
|
537
|
+
:external, 4,
|
538
|
+
:core, 5
|
539
|
+
)
|
519
540
|
end
|
520
541
|
end
|