htslib 0.2.3 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +65 -47
- data/TUTORIAL.md +270 -0
- data/lib/hts/bam/auxi.rb +28 -2
- data/lib/hts/bam/cigar.rb +46 -6
- data/lib/hts/bam/flag.rb +43 -4
- data/lib/hts/bam/header.rb +53 -2
- data/lib/hts/bam/header_record.rb +11 -0
- data/lib/hts/bam/record.rb +66 -24
- data/lib/hts/bam.rb +88 -73
- data/lib/hts/bcf/format.rb +28 -24
- data/lib/hts/bcf/header.rb +79 -2
- data/lib/hts/bcf/header_record.rb +35 -1
- data/lib/hts/bcf/info.rb +28 -24
- data/lib/hts/bcf.rb +118 -98
- data/lib/hts/faidx/sequence.rb +64 -0
- data/lib/hts/faidx.rb +64 -15
- data/lib/hts/hts.rb +12 -9
- data/lib/hts/libhts/constants.rb +46 -14
- data/lib/hts/libhts/cram.rb +1 -1
- data/lib/hts/libhts/sam.rb +4 -4
- data/lib/hts/libhts/tbx.rb +2 -0
- data/lib/hts/libhts/tbx_funcs.rb +3 -1
- data/lib/hts/libhts/vcf.rb +1 -1
- data/lib/hts/libhts/vcf_funcs.rb +16 -8
- data/lib/hts/tbx.rb +50 -5
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +1 -0
- metadata +6 -3
data/lib/hts/bcf/header.rb
CHANGED
@@ -1,11 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative "header_record"
|
4
|
+
|
3
5
|
module HTS
|
4
6
|
class Bcf < Hts
|
5
7
|
# A class for working with VCF records.
|
8
|
+
# NOTE: This class has a lot of methods that are not stable.
|
9
|
+
# The method names and the number of arguments may change in the future.
|
6
10
|
class Header
|
7
|
-
def initialize(
|
8
|
-
|
11
|
+
def initialize(arg = nil)
|
12
|
+
case arg
|
13
|
+
when LibHTS::HtsFile
|
14
|
+
@bcf_hdr = LibHTS.bcf_hdr_read(arg)
|
15
|
+
when LibHTS::BcfHdr
|
16
|
+
@bcf_hdr = arg
|
17
|
+
when nil
|
18
|
+
@bcf_hdr = LibHTS.bcf_hdr_init("w")
|
19
|
+
else
|
20
|
+
raise TypeError, "Invalid argument"
|
21
|
+
end
|
9
22
|
end
|
10
23
|
|
11
24
|
def struct
|
@@ -20,6 +33,10 @@ module HTS
|
|
20
33
|
LibHTS.bcf_hdr_get_version(@bcf_hdr)
|
21
34
|
end
|
22
35
|
|
36
|
+
def set_version(version)
|
37
|
+
LibHTS.bcf_hdr_set_version(@bcf_hdr, version)
|
38
|
+
end
|
39
|
+
|
23
40
|
def nsamples
|
24
41
|
LibHTS.bcf_hdr_nsamples(@bcf_hdr)
|
25
42
|
end
|
@@ -31,6 +48,45 @@ module HTS
|
|
31
48
|
.map(&:read_string)
|
32
49
|
end
|
33
50
|
|
51
|
+
def add_sample(sample, sync: true)
|
52
|
+
LibHTS.bcf_hdr_add_sample(@bcf_hdr, sample)
|
53
|
+
self.sync if sync
|
54
|
+
end
|
55
|
+
|
56
|
+
def merge(hdr)
|
57
|
+
LibHTS.bcf_hdr_merge(@bcf_hdr, hdr.struct)
|
58
|
+
end
|
59
|
+
|
60
|
+
def sync
|
61
|
+
LibHTS.bcf_hdr_sync(@bcf_hdr)
|
62
|
+
end
|
63
|
+
|
64
|
+
def read_bcf(fname)
|
65
|
+
LibHTS.bcf_hdr_set(@bcf_hdr, fname)
|
66
|
+
end
|
67
|
+
|
68
|
+
def append(line)
|
69
|
+
LibHTS.bcf_hdr_append(@bcf_hdr, line)
|
70
|
+
end
|
71
|
+
|
72
|
+
def delete(bcf_hl_type, key) # FIXME
|
73
|
+
type = bcf_hl_type_to_int(bcf_hl_type)
|
74
|
+
LibHTS.bcf_hdr_remove(@bcf_hdr, type, key)
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_hrec(bcf_hl_type, key, value, str_class = nil)
|
78
|
+
type = bcf_hl_type_to_int(bcf_hl_type)
|
79
|
+
hrec = LibHTS.bcf_hdr_get_hrec(@bcf_hdr, type, key, value, str_class)
|
80
|
+
HeaderRecord.new(hrec)
|
81
|
+
end
|
82
|
+
|
83
|
+
def seqnames
|
84
|
+
n = FFI::MemoryPointer.new(:int)
|
85
|
+
names = LibHTS.bcf_hdr_seqnames(@bcf_hdr, n)
|
86
|
+
names.read_array_of_pointer(n.read_int)
|
87
|
+
.map(&:read_string)
|
88
|
+
end
|
89
|
+
|
34
90
|
def to_s
|
35
91
|
kstr = LibHTS::KString.new
|
36
92
|
raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
|
@@ -40,6 +96,27 @@ module HTS
|
|
40
96
|
|
41
97
|
private
|
42
98
|
|
99
|
+
def bcf_hl_type_to_int(bcf_hl_type)
|
100
|
+
return bcf_hl_type if bcf_hl_type.is_a?(Integer)
|
101
|
+
|
102
|
+
case bcf_hl_type.to_s.upcase
|
103
|
+
when "FILTER", "FIL"
|
104
|
+
LibHTS::BCF_HL_FLT
|
105
|
+
when "INFO"
|
106
|
+
LibHTS::BCF_HL_INFO
|
107
|
+
when "FORMAT", "FMT"
|
108
|
+
LibHTS::BCF_HL_FMT
|
109
|
+
when "CONTIG", "CTG"
|
110
|
+
LibHTS::BCF_HL_CTG
|
111
|
+
when "STRUCTURED", "STR"
|
112
|
+
LibHTS::BCF_HL_STR
|
113
|
+
when "GENOTYPE", "GEN"
|
114
|
+
LibHTS::BCF_HL_GEN
|
115
|
+
else
|
116
|
+
raise TypeError, "Invalid argument"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
43
120
|
def initialize_copy(orig)
|
44
121
|
@bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
|
45
122
|
end
|
@@ -3,9 +3,43 @@
|
|
3
3
|
module HTS
|
4
4
|
class Bcf < Hts
|
5
5
|
class HeaderRecord
|
6
|
-
def initialize
|
6
|
+
def initialize(arg = nil)
|
7
|
+
case arg
|
8
|
+
when LibHTS::BcfHrec
|
9
|
+
@bcf_hrec = arg
|
10
|
+
else
|
11
|
+
raise TypeError, "Invalid argument"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def struct
|
7
16
|
@bcf_hrec
|
8
17
|
end
|
18
|
+
|
19
|
+
def add_key(key)
|
20
|
+
LibHTS.bcf_hrec_add_key(@bcf_hrec, key, key.length)
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_value(i, val, quote: true)
|
24
|
+
is_quoted = quote ? 1 : 0
|
25
|
+
LibHTS.bcf_hrec_set_val(@bcf_hrec, i, val, val.length, is_quoted)
|
26
|
+
end
|
27
|
+
|
28
|
+
def find_key(key)
|
29
|
+
LibHTS.bcf_hrec_find_key(@bcf_hrec, key)
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_s
|
33
|
+
kstr = LibHTS::KString.new
|
34
|
+
LibHTS.bcf_hrec_format(@bcf_hrec, kstr)
|
35
|
+
kstr[:s]
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def initialize_copy(orig)
|
41
|
+
@bcf_hrec = LibHTS.bcf_hrec_dup(orig.struct)
|
42
|
+
end
|
9
43
|
end
|
10
44
|
end
|
11
45
|
end
|
data/lib/hts/bcf/info.rb
CHANGED
@@ -9,31 +9,11 @@ module HTS
|
|
9
9
|
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
10
10
|
end
|
11
11
|
|
12
|
-
# For compatibility with htslib.cr.
|
13
|
-
def get_int(key)
|
14
|
-
get(key, :int)
|
15
|
-
end
|
16
|
-
|
17
|
-
# For compatibility with htslib.cr.
|
18
|
-
def get_float(key)
|
19
|
-
get(key, :float)
|
20
|
-
end
|
21
|
-
|
22
|
-
# For compatibility with htslib.cr.
|
23
|
-
def get_string(key)
|
24
|
-
get(key, :string)
|
25
|
-
end
|
26
|
-
|
27
|
-
# For compatibility with htslib.cr.
|
28
|
-
def get_flag(key)
|
29
|
-
get(key, :flag)
|
30
|
-
end
|
31
|
-
|
32
|
-
def [](key)
|
33
|
-
get(key)
|
34
|
-
end
|
35
|
-
|
36
12
|
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
13
|
+
# @note: Why is this method named "get" instead of "fetch"?
|
14
|
+
# This is for compatibility with the Crystal language
|
15
|
+
# which provides methods like `get_int`, `get_float`, etc.
|
16
|
+
# I think they are better than `fetch_int`` and `fetch_float`.
|
37
17
|
def get(key, type = nil)
|
38
18
|
n = FFI::MemoryPointer.new(:int)
|
39
19
|
p1 = @p1
|
@@ -70,6 +50,30 @@ module HTS
|
|
70
50
|
end
|
71
51
|
end
|
72
52
|
|
53
|
+
# For compatibility with HTS.cr.
|
54
|
+
def get_int(key)
|
55
|
+
get(key, :int)
|
56
|
+
end
|
57
|
+
|
58
|
+
# For compatibility with HTS.cr.
|
59
|
+
def get_float(key)
|
60
|
+
get(key, :float)
|
61
|
+
end
|
62
|
+
|
63
|
+
# For compatibility with HTS.cr.
|
64
|
+
def get_string(key)
|
65
|
+
get(key, :string)
|
66
|
+
end
|
67
|
+
|
68
|
+
# For compatibility with HTS.cr.
|
69
|
+
def get_flag(key)
|
70
|
+
get(key, :flag)
|
71
|
+
end
|
72
|
+
|
73
|
+
def [](key)
|
74
|
+
get(key)
|
75
|
+
end
|
76
|
+
|
73
77
|
# FIXME: naming? room for improvement.
|
74
78
|
def fields
|
75
79
|
keys.map do |key|
|
data/lib/hts/bcf.rb
CHANGED
@@ -52,10 +52,9 @@ module HTS
|
|
52
52
|
build_index(index) if build_index
|
53
53
|
@idx = load_index(index)
|
54
54
|
@start_position = tell
|
55
|
-
super # do nothing
|
56
55
|
end
|
57
56
|
|
58
|
-
def build_index(index_name = nil, min_shift: 14)
|
57
|
+
def build_index(index_name = nil, min_shift: 14, threads: 2)
|
59
58
|
check_closed
|
60
59
|
|
61
60
|
if index_name
|
@@ -63,10 +62,15 @@ module HTS
|
|
63
62
|
else
|
64
63
|
warn "Create index for #{@file_name}"
|
65
64
|
end
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
65
|
+
case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
|
66
|
+
when 0 # sccessful
|
67
|
+
when -1 then raise "indexing failed"
|
68
|
+
when -2 then raise "opening #{@file_name} failed"
|
69
|
+
when -3 then raise "format not indexable"
|
70
|
+
when -4 then raise "failed to create and/or save the index"
|
71
|
+
else raise "unknown error"
|
72
|
+
end
|
73
|
+
self # for method chaining
|
70
74
|
end
|
71
75
|
|
72
76
|
def load_index(index_name = nil)
|
@@ -85,22 +89,34 @@ module HTS
|
|
85
89
|
!@idx.null?
|
86
90
|
end
|
87
91
|
|
88
|
-
def
|
92
|
+
def close
|
93
|
+
LibHTS.hts_idx_destroy(@idx) unless @idx&.null?
|
94
|
+
@idx = nil
|
95
|
+
super
|
96
|
+
end
|
97
|
+
|
98
|
+
def write_header(header)
|
89
99
|
check_closed
|
90
100
|
|
91
101
|
@header = header.dup
|
92
|
-
LibHTS.hts_set_fai_filename(header, @file_name)
|
93
102
|
LibHTS.bcf_hdr_write(@hts_file, header)
|
94
103
|
end
|
95
104
|
|
96
|
-
def
|
105
|
+
def header=(header)
|
106
|
+
write_header(header)
|
107
|
+
end
|
108
|
+
|
109
|
+
def write(record)
|
97
110
|
check_closed
|
98
111
|
|
99
|
-
|
100
|
-
LibHTS.bcf_write(@hts_file, header,
|
112
|
+
# record = record.dup
|
113
|
+
r = LibHTS.bcf_write(@hts_file, header, record)
|
114
|
+
raise "Failed to write record" if r < 0
|
101
115
|
end
|
102
116
|
|
103
|
-
|
117
|
+
def <<(var)
|
118
|
+
write(var)
|
119
|
+
end
|
104
120
|
|
105
121
|
def nsamples
|
106
122
|
check_closed
|
@@ -122,29 +138,6 @@ module HTS
|
|
122
138
|
end
|
123
139
|
end
|
124
140
|
|
125
|
-
private def each_record_copy
|
126
|
-
check_closed
|
127
|
-
|
128
|
-
return to_enum(__method__) unless block_given?
|
129
|
-
|
130
|
-
while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
|
131
|
-
record = Record.new(bcf1, header)
|
132
|
-
yield record
|
133
|
-
end
|
134
|
-
self
|
135
|
-
end
|
136
|
-
|
137
|
-
private def each_record_reuse
|
138
|
-
check_closed
|
139
|
-
|
140
|
-
return to_enum(__method__) unless block_given?
|
141
|
-
|
142
|
-
bcf1 = LibHTS.bcf_init
|
143
|
-
record = Record.new(bcf1, header)
|
144
|
-
yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
|
145
|
-
self
|
146
|
-
end
|
147
|
-
|
148
141
|
def query(...)
|
149
142
|
querys(...) # Fixme
|
150
143
|
end
|
@@ -166,55 +159,6 @@ module HTS
|
|
166
159
|
# private def queryi_reuse
|
167
160
|
# end
|
168
161
|
|
169
|
-
private def querys_copy(region)
|
170
|
-
check_closed
|
171
|
-
|
172
|
-
raise "query is only available for BCF files" unless file_format == "bcf"
|
173
|
-
raise "Index file is required to call the query method." unless index_loaded?
|
174
|
-
return to_enum(__method__, region) unless block_given?
|
175
|
-
|
176
|
-
qitr = LibHTS.bcf_itr_querys(@idx, header, region)
|
177
|
-
|
178
|
-
begin
|
179
|
-
loop do
|
180
|
-
bcf1 = LibHTS.bcf_init
|
181
|
-
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
|
182
|
-
break if slen == -1
|
183
|
-
raise if slen < -1
|
184
|
-
|
185
|
-
yield Record.new(bcf1, header)
|
186
|
-
end
|
187
|
-
ensure
|
188
|
-
LibHTS.bcf_itr_destroy(qitr)
|
189
|
-
end
|
190
|
-
self
|
191
|
-
end
|
192
|
-
|
193
|
-
private def querys_reuse(region)
|
194
|
-
check_closed
|
195
|
-
|
196
|
-
raise "query is only available for BCF files" unless file_format == "bcf"
|
197
|
-
raise "Index file is required to call the query method." unless index_loaded?
|
198
|
-
return to_enum(__method__, region) unless block_given?
|
199
|
-
|
200
|
-
qitr = LibHTS.bcf_itr_querys(@idx, header, region)
|
201
|
-
|
202
|
-
bcf1 = LibHTS.bcf_init
|
203
|
-
record = Record.new(bcf1, header)
|
204
|
-
begin
|
205
|
-
loop do
|
206
|
-
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
|
207
|
-
break if slen == -1
|
208
|
-
raise if slen < -1
|
209
|
-
|
210
|
-
yield record
|
211
|
-
end
|
212
|
-
ensure
|
213
|
-
LibHTS.bcf_itr_destroy(qitr)
|
214
|
-
end
|
215
|
-
self
|
216
|
-
end
|
217
|
-
|
218
162
|
# @!macro [attach] define_getter
|
219
163
|
# @method $1
|
220
164
|
# Get $1 array
|
@@ -231,13 +175,13 @@ module HTS
|
|
231
175
|
def info(key = nil)
|
232
176
|
check_closed
|
233
177
|
position = tell
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
178
|
+
raise NotImplementedError unless key
|
179
|
+
|
180
|
+
ary = map { |r| r.info(key) }
|
181
|
+
|
182
|
+
# ary = each_copy.map { |r| r.info }
|
183
|
+
# ary = map { |r| r.info.clone }
|
184
|
+
|
241
185
|
seek(position)
|
242
186
|
ary
|
243
187
|
end
|
@@ -245,13 +189,13 @@ module HTS
|
|
245
189
|
def format(key = nil)
|
246
190
|
check_closed
|
247
191
|
position = tell
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
192
|
+
raise NotImplementedError unless key
|
193
|
+
|
194
|
+
ary = map { |r| r.format(key) }
|
195
|
+
|
196
|
+
# ary = each_copy.map { |r| r.format }
|
197
|
+
# ary = map { |r| r.format.clone }
|
198
|
+
|
255
199
|
seek(position)
|
256
200
|
ary
|
257
201
|
end
|
@@ -285,5 +229,81 @@ module HTS
|
|
285
229
|
yield r.format(key)
|
286
230
|
end
|
287
231
|
end
|
232
|
+
|
233
|
+
private
|
234
|
+
|
235
|
+
def querys_reuse(region)
|
236
|
+
check_closed
|
237
|
+
|
238
|
+
raise "query is only available for BCF files" unless file_format == "bcf"
|
239
|
+
raise "Index file is required to call the query method." unless index_loaded?
|
240
|
+
return to_enum(__method__, region) unless block_given?
|
241
|
+
|
242
|
+
qiter = LibHTS.bcf_itr_querys(@idx, header, region)
|
243
|
+
raise "Failed to query region #{region}" if qiter.null?
|
244
|
+
|
245
|
+
bcf1 = LibHTS.bcf_init
|
246
|
+
record = Record.new(bcf1, header)
|
247
|
+
begin
|
248
|
+
loop do
|
249
|
+
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
|
250
|
+
break if slen == -1
|
251
|
+
raise if slen < -1
|
252
|
+
|
253
|
+
yield record
|
254
|
+
end
|
255
|
+
ensure
|
256
|
+
LibHTS.bcf_itr_destroy(qiter)
|
257
|
+
end
|
258
|
+
self
|
259
|
+
end
|
260
|
+
|
261
|
+
def querys_copy(region)
|
262
|
+
check_closed
|
263
|
+
|
264
|
+
raise "query is only available for BCF files" unless file_format == "bcf"
|
265
|
+
raise "Index file is required to call the query method." unless index_loaded?
|
266
|
+
return to_enum(__method__, region) unless block_given?
|
267
|
+
|
268
|
+
qiter = LibHTS.bcf_itr_querys(@idx, header, region)
|
269
|
+
raise "Failed to query region #{region}" if qiter.null?
|
270
|
+
|
271
|
+
begin
|
272
|
+
loop do
|
273
|
+
bcf1 = LibHTS.bcf_init
|
274
|
+
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
|
275
|
+
break if slen == -1
|
276
|
+
raise if slen < -1
|
277
|
+
|
278
|
+
yield Record.new(bcf1, header)
|
279
|
+
end
|
280
|
+
ensure
|
281
|
+
LibHTS.bcf_itr_destroy(qiter)
|
282
|
+
end
|
283
|
+
self
|
284
|
+
end
|
285
|
+
|
286
|
+
def each_record_reuse
|
287
|
+
check_closed
|
288
|
+
|
289
|
+
return to_enum(__method__) unless block_given?
|
290
|
+
|
291
|
+
bcf1 = LibHTS.bcf_init
|
292
|
+
record = Record.new(bcf1, header)
|
293
|
+
yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
|
294
|
+
self
|
295
|
+
end
|
296
|
+
|
297
|
+
def each_record_copy
|
298
|
+
check_closed
|
299
|
+
|
300
|
+
return to_enum(__method__) unless block_given?
|
301
|
+
|
302
|
+
while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
|
303
|
+
record = Record.new(bcf1, header)
|
304
|
+
yield record
|
305
|
+
end
|
306
|
+
self
|
307
|
+
end
|
288
308
|
end
|
289
309
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require_relative "../faidx"
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Faidx
|
5
|
+
class Sequence
|
6
|
+
attr_reader :name, :faidx
|
7
|
+
|
8
|
+
def initialize(faidx, name)
|
9
|
+
raise unless faidx.has_key?(name)
|
10
|
+
|
11
|
+
@faidx = faidx
|
12
|
+
@name = name
|
13
|
+
end
|
14
|
+
|
15
|
+
def length
|
16
|
+
faidx.seq_len(name)
|
17
|
+
end
|
18
|
+
alias size length
|
19
|
+
|
20
|
+
def seq(start = nil, stop = nil)
|
21
|
+
faidx.seq(name, start, stop)
|
22
|
+
end
|
23
|
+
|
24
|
+
def qual(start = nil, stop = nil)
|
25
|
+
faidx.qual(name, start, stop)
|
26
|
+
end
|
27
|
+
|
28
|
+
def [](arg)
|
29
|
+
case arg
|
30
|
+
when Integer
|
31
|
+
if arg >= 0
|
32
|
+
start = arg
|
33
|
+
stop = arg
|
34
|
+
else
|
35
|
+
start = length + arg
|
36
|
+
stop = length + arg
|
37
|
+
end
|
38
|
+
when Range
|
39
|
+
arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
|
40
|
+
arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
|
41
|
+
if arg.begin.nil?
|
42
|
+
if arg.end.nil?
|
43
|
+
start = nil
|
44
|
+
stop = nil
|
45
|
+
else
|
46
|
+
start = 0
|
47
|
+
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
48
|
+
end
|
49
|
+
elsif arg.end.nil?
|
50
|
+
# always include the first base
|
51
|
+
start = arg.begin
|
52
|
+
stop = length - 1
|
53
|
+
else
|
54
|
+
start = arg.begin
|
55
|
+
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
56
|
+
end
|
57
|
+
else
|
58
|
+
raise ArgumentError
|
59
|
+
end
|
60
|
+
seq(start, stop)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/hts/faidx.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "../htslib"
|
4
|
+
require_relative "faidx/sequence"
|
4
5
|
|
5
6
|
module HTS
|
6
7
|
class Faidx
|
@@ -25,7 +26,11 @@ module HTS
|
|
25
26
|
end
|
26
27
|
|
27
28
|
@file_name = file_name
|
28
|
-
@fai =
|
29
|
+
@fai = if [".fq", ".fastq"].include? File.extname(@file_name)
|
30
|
+
LibHTS.fai_load_format(@file_name, 2)
|
31
|
+
else
|
32
|
+
LibHTS.fai_load(@file_name)
|
33
|
+
end
|
29
34
|
|
30
35
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
|
31
36
|
end
|
@@ -38,10 +43,9 @@ module HTS
|
|
38
43
|
LibHTS.fai_destroy(@fai)
|
39
44
|
end
|
40
45
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
# end
|
46
|
+
def file_format
|
47
|
+
@fai[:format]
|
48
|
+
end
|
45
49
|
|
46
50
|
# the number of sequences in the index.
|
47
51
|
def length
|
@@ -50,31 +54,48 @@ module HTS
|
|
50
54
|
alias size length
|
51
55
|
|
52
56
|
# return the length of the requested chromosome.
|
53
|
-
def
|
57
|
+
def names
|
58
|
+
Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
|
59
|
+
end
|
60
|
+
|
61
|
+
alias keys names
|
62
|
+
|
63
|
+
def has_key?(key)
|
64
|
+
raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
|
65
|
+
|
66
|
+
key = key.to_s
|
67
|
+
case LibHTS.faidx_has_seq(@fai, key)
|
68
|
+
when 1 then true
|
69
|
+
when 0 then false
|
70
|
+
else raise
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def [](name)
|
75
|
+
name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
|
76
|
+
Sequence.new(self, name)
|
77
|
+
end
|
78
|
+
|
79
|
+
# return the length of the requested chromosome.
|
80
|
+
def seq_len(chrom)
|
54
81
|
raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
55
82
|
|
56
83
|
chrom = chrom.to_s
|
57
84
|
result = LibHTS.faidx_seq_len(@fai, chrom)
|
58
85
|
result == -1 ? nil : result
|
59
86
|
end
|
60
|
-
alias chrom_length chrom_size
|
61
87
|
|
62
|
-
#
|
63
|
-
def chrom_names
|
64
|
-
Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
|
65
|
-
end
|
66
|
-
|
67
|
-
# @overload fetch(name)
|
88
|
+
# @overload seq(name)
|
68
89
|
# Fetch the sequence as a String.
|
69
90
|
# @param name [String] chr1:0-10
|
70
|
-
# @overload
|
91
|
+
# @overload seq(name, start, stop)
|
71
92
|
# Fetch the sequence as a String.
|
72
93
|
# @param name [String] the name of the chromosome
|
73
94
|
# @param start [Integer] the start position of the sequence (0-based)
|
74
95
|
# @param stop [Integer] the end position of the sequence (0-based)
|
75
96
|
# @return [String] the sequence
|
76
97
|
|
77
|
-
def
|
98
|
+
def fetch_seq(name, start = nil, stop = nil)
|
78
99
|
name = name.to_s
|
79
100
|
rlen = FFI::MemoryPointer.new(:int)
|
80
101
|
|
@@ -84,6 +105,7 @@ module HTS
|
|
84
105
|
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
|
85
106
|
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
|
86
107
|
start > stop && raise(ArgumentError, "Expect start to be <= stop")
|
108
|
+
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
|
87
109
|
|
88
110
|
result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
|
89
111
|
end
|
@@ -95,5 +117,32 @@ module HTS
|
|
95
117
|
|
96
118
|
result
|
97
119
|
end
|
120
|
+
|
121
|
+
alias seq fetch_seq
|
122
|
+
|
123
|
+
def fetch_qual(name, start = nil, stop = nil)
|
124
|
+
name = name.to_s
|
125
|
+
rlen = FFI::MemoryPointer.new(:int)
|
126
|
+
|
127
|
+
if start.nil? && stop.nil?
|
128
|
+
result = LibHTS.fai_fetchqual(@fai, name, rlen)
|
129
|
+
else
|
130
|
+
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
|
131
|
+
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
|
132
|
+
start > stop && raise(ArgumentError, "Expect start to be <= stop")
|
133
|
+
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
|
134
|
+
|
135
|
+
result = LibHTS.faidx_fetch_qual(@fai, name, start, stop, rlen)
|
136
|
+
end
|
137
|
+
|
138
|
+
case rlen.read_int
|
139
|
+
when -2 then raise "Invalid chromosome name: #{name}"
|
140
|
+
when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
|
141
|
+
end
|
142
|
+
|
143
|
+
result
|
144
|
+
end
|
145
|
+
|
146
|
+
alias qual fetch_qual
|
98
147
|
end
|
99
148
|
end
|