htslib 0.2.3 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +65 -47
- data/TUTORIAL.md +270 -0
- data/lib/hts/bam/auxi.rb +28 -2
- data/lib/hts/bam/cigar.rb +46 -6
- data/lib/hts/bam/flag.rb +43 -4
- data/lib/hts/bam/header.rb +53 -2
- data/lib/hts/bam/header_record.rb +11 -0
- data/lib/hts/bam/record.rb +66 -24
- data/lib/hts/bam.rb +88 -73
- data/lib/hts/bcf/format.rb +28 -24
- data/lib/hts/bcf/header.rb +79 -2
- data/lib/hts/bcf/header_record.rb +35 -1
- data/lib/hts/bcf/info.rb +28 -24
- data/lib/hts/bcf.rb +118 -98
- data/lib/hts/faidx/sequence.rb +64 -0
- data/lib/hts/faidx.rb +64 -15
- data/lib/hts/hts.rb +12 -9
- data/lib/hts/libhts/constants.rb +46 -14
- data/lib/hts/libhts/cram.rb +1 -1
- data/lib/hts/libhts/sam.rb +4 -4
- data/lib/hts/libhts/tbx.rb +2 -0
- data/lib/hts/libhts/tbx_funcs.rb +3 -1
- data/lib/hts/libhts/vcf.rb +1 -1
- data/lib/hts/libhts/vcf_funcs.rb +16 -8
- data/lib/hts/tbx.rb +50 -5
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +1 -0
- metadata +6 -3
data/lib/hts/bcf/header.rb
CHANGED
@@ -1,11 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative "header_record"
|
4
|
+
|
3
5
|
module HTS
|
4
6
|
class Bcf < Hts
|
5
7
|
# A class for working with VCF records.
|
8
|
+
# NOTE: This class has a lot of methods that are not stable.
|
9
|
+
# The method names and the number of arguments may change in the future.
|
6
10
|
class Header
|
7
|
-
def initialize(
|
8
|
-
|
11
|
+
def initialize(arg = nil)
|
12
|
+
case arg
|
13
|
+
when LibHTS::HtsFile
|
14
|
+
@bcf_hdr = LibHTS.bcf_hdr_read(arg)
|
15
|
+
when LibHTS::BcfHdr
|
16
|
+
@bcf_hdr = arg
|
17
|
+
when nil
|
18
|
+
@bcf_hdr = LibHTS.bcf_hdr_init("w")
|
19
|
+
else
|
20
|
+
raise TypeError, "Invalid argument"
|
21
|
+
end
|
9
22
|
end
|
10
23
|
|
11
24
|
def struct
|
@@ -20,6 +33,10 @@ module HTS
|
|
20
33
|
LibHTS.bcf_hdr_get_version(@bcf_hdr)
|
21
34
|
end
|
22
35
|
|
36
|
+
def set_version(version)
|
37
|
+
LibHTS.bcf_hdr_set_version(@bcf_hdr, version)
|
38
|
+
end
|
39
|
+
|
23
40
|
def nsamples
|
24
41
|
LibHTS.bcf_hdr_nsamples(@bcf_hdr)
|
25
42
|
end
|
@@ -31,6 +48,45 @@ module HTS
|
|
31
48
|
.map(&:read_string)
|
32
49
|
end
|
33
50
|
|
51
|
+
def add_sample(sample, sync: true)
|
52
|
+
LibHTS.bcf_hdr_add_sample(@bcf_hdr, sample)
|
53
|
+
self.sync if sync
|
54
|
+
end
|
55
|
+
|
56
|
+
def merge(hdr)
|
57
|
+
LibHTS.bcf_hdr_merge(@bcf_hdr, hdr.struct)
|
58
|
+
end
|
59
|
+
|
60
|
+
def sync
|
61
|
+
LibHTS.bcf_hdr_sync(@bcf_hdr)
|
62
|
+
end
|
63
|
+
|
64
|
+
def read_bcf(fname)
|
65
|
+
LibHTS.bcf_hdr_set(@bcf_hdr, fname)
|
66
|
+
end
|
67
|
+
|
68
|
+
def append(line)
|
69
|
+
LibHTS.bcf_hdr_append(@bcf_hdr, line)
|
70
|
+
end
|
71
|
+
|
72
|
+
def delete(bcf_hl_type, key) # FIXME
|
73
|
+
type = bcf_hl_type_to_int(bcf_hl_type)
|
74
|
+
LibHTS.bcf_hdr_remove(@bcf_hdr, type, key)
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_hrec(bcf_hl_type, key, value, str_class = nil)
|
78
|
+
type = bcf_hl_type_to_int(bcf_hl_type)
|
79
|
+
hrec = LibHTS.bcf_hdr_get_hrec(@bcf_hdr, type, key, value, str_class)
|
80
|
+
HeaderRecord.new(hrec)
|
81
|
+
end
|
82
|
+
|
83
|
+
def seqnames
|
84
|
+
n = FFI::MemoryPointer.new(:int)
|
85
|
+
names = LibHTS.bcf_hdr_seqnames(@bcf_hdr, n)
|
86
|
+
names.read_array_of_pointer(n.read_int)
|
87
|
+
.map(&:read_string)
|
88
|
+
end
|
89
|
+
|
34
90
|
def to_s
|
35
91
|
kstr = LibHTS::KString.new
|
36
92
|
raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
|
@@ -40,6 +96,27 @@ module HTS
|
|
40
96
|
|
41
97
|
private
|
42
98
|
|
99
|
+
def bcf_hl_type_to_int(bcf_hl_type)
|
100
|
+
return bcf_hl_type if bcf_hl_type.is_a?(Integer)
|
101
|
+
|
102
|
+
case bcf_hl_type.to_s.upcase
|
103
|
+
when "FILTER", "FIL"
|
104
|
+
LibHTS::BCF_HL_FLT
|
105
|
+
when "INFO"
|
106
|
+
LibHTS::BCF_HL_INFO
|
107
|
+
when "FORMAT", "FMT"
|
108
|
+
LibHTS::BCF_HL_FMT
|
109
|
+
when "CONTIG", "CTG"
|
110
|
+
LibHTS::BCF_HL_CTG
|
111
|
+
when "STRUCTURED", "STR"
|
112
|
+
LibHTS::BCF_HL_STR
|
113
|
+
when "GENOTYPE", "GEN"
|
114
|
+
LibHTS::BCF_HL_GEN
|
115
|
+
else
|
116
|
+
raise TypeError, "Invalid argument"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
43
120
|
def initialize_copy(orig)
|
44
121
|
@bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
|
45
122
|
end
|
@@ -3,9 +3,43 @@
|
|
3
3
|
module HTS
|
4
4
|
class Bcf < Hts
|
5
5
|
class HeaderRecord
|
6
|
-
def initialize
|
6
|
+
def initialize(arg = nil)
|
7
|
+
case arg
|
8
|
+
when LibHTS::BcfHrec
|
9
|
+
@bcf_hrec = arg
|
10
|
+
else
|
11
|
+
raise TypeError, "Invalid argument"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def struct
|
7
16
|
@bcf_hrec
|
8
17
|
end
|
18
|
+
|
19
|
+
def add_key(key)
|
20
|
+
LibHTS.bcf_hrec_add_key(@bcf_hrec, key, key.length)
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_value(i, val, quote: true)
|
24
|
+
is_quoted = quote ? 1 : 0
|
25
|
+
LibHTS.bcf_hrec_set_val(@bcf_hrec, i, val, val.length, is_quoted)
|
26
|
+
end
|
27
|
+
|
28
|
+
def find_key(key)
|
29
|
+
LibHTS.bcf_hrec_find_key(@bcf_hrec, key)
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_s
|
33
|
+
kstr = LibHTS::KString.new
|
34
|
+
LibHTS.bcf_hrec_format(@bcf_hrec, kstr)
|
35
|
+
kstr[:s]
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def initialize_copy(orig)
|
41
|
+
@bcf_hrec = LibHTS.bcf_hrec_dup(orig.struct)
|
42
|
+
end
|
9
43
|
end
|
10
44
|
end
|
11
45
|
end
|
data/lib/hts/bcf/info.rb
CHANGED
@@ -9,31 +9,11 @@ module HTS
|
|
9
9
|
@p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
|
10
10
|
end
|
11
11
|
|
12
|
-
# For compatibility with htslib.cr.
|
13
|
-
def get_int(key)
|
14
|
-
get(key, :int)
|
15
|
-
end
|
16
|
-
|
17
|
-
# For compatibility with htslib.cr.
|
18
|
-
def get_float(key)
|
19
|
-
get(key, :float)
|
20
|
-
end
|
21
|
-
|
22
|
-
# For compatibility with htslib.cr.
|
23
|
-
def get_string(key)
|
24
|
-
get(key, :string)
|
25
|
-
end
|
26
|
-
|
27
|
-
# For compatibility with htslib.cr.
|
28
|
-
def get_flag(key)
|
29
|
-
get(key, :flag)
|
30
|
-
end
|
31
|
-
|
32
|
-
def [](key)
|
33
|
-
get(key)
|
34
|
-
end
|
35
|
-
|
36
12
|
# @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
|
13
|
+
# @note: Why is this method named "get" instead of "fetch"?
|
14
|
+
# This is for compatibility with the Crystal language
|
15
|
+
# which provides methods like `get_int`, `get_float`, etc.
|
16
|
+
# I think they are better than `fetch_int`` and `fetch_float`.
|
37
17
|
def get(key, type = nil)
|
38
18
|
n = FFI::MemoryPointer.new(:int)
|
39
19
|
p1 = @p1
|
@@ -70,6 +50,30 @@ module HTS
|
|
70
50
|
end
|
71
51
|
end
|
72
52
|
|
53
|
+
# For compatibility with HTS.cr.
|
54
|
+
def get_int(key)
|
55
|
+
get(key, :int)
|
56
|
+
end
|
57
|
+
|
58
|
+
# For compatibility with HTS.cr.
|
59
|
+
def get_float(key)
|
60
|
+
get(key, :float)
|
61
|
+
end
|
62
|
+
|
63
|
+
# For compatibility with HTS.cr.
|
64
|
+
def get_string(key)
|
65
|
+
get(key, :string)
|
66
|
+
end
|
67
|
+
|
68
|
+
# For compatibility with HTS.cr.
|
69
|
+
def get_flag(key)
|
70
|
+
get(key, :flag)
|
71
|
+
end
|
72
|
+
|
73
|
+
def [](key)
|
74
|
+
get(key)
|
75
|
+
end
|
76
|
+
|
73
77
|
# FIXME: naming? room for improvement.
|
74
78
|
def fields
|
75
79
|
keys.map do |key|
|
data/lib/hts/bcf.rb
CHANGED
@@ -52,10 +52,9 @@ module HTS
|
|
52
52
|
build_index(index) if build_index
|
53
53
|
@idx = load_index(index)
|
54
54
|
@start_position = tell
|
55
|
-
super # do nothing
|
56
55
|
end
|
57
56
|
|
58
|
-
def build_index(index_name = nil, min_shift: 14)
|
57
|
+
def build_index(index_name = nil, min_shift: 14, threads: 2)
|
59
58
|
check_closed
|
60
59
|
|
61
60
|
if index_name
|
@@ -63,10 +62,15 @@ module HTS
|
|
63
62
|
else
|
64
63
|
warn "Create index for #{@file_name}"
|
65
64
|
end
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
65
|
+
case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
|
66
|
+
when 0 # sccessful
|
67
|
+
when -1 then raise "indexing failed"
|
68
|
+
when -2 then raise "opening #{@file_name} failed"
|
69
|
+
when -3 then raise "format not indexable"
|
70
|
+
when -4 then raise "failed to create and/or save the index"
|
71
|
+
else raise "unknown error"
|
72
|
+
end
|
73
|
+
self # for method chaining
|
70
74
|
end
|
71
75
|
|
72
76
|
def load_index(index_name = nil)
|
@@ -85,22 +89,34 @@ module HTS
|
|
85
89
|
!@idx.null?
|
86
90
|
end
|
87
91
|
|
88
|
-
def
|
92
|
+
def close
|
93
|
+
LibHTS.hts_idx_destroy(@idx) unless @idx&.null?
|
94
|
+
@idx = nil
|
95
|
+
super
|
96
|
+
end
|
97
|
+
|
98
|
+
def write_header(header)
|
89
99
|
check_closed
|
90
100
|
|
91
101
|
@header = header.dup
|
92
|
-
LibHTS.hts_set_fai_filename(header, @file_name)
|
93
102
|
LibHTS.bcf_hdr_write(@hts_file, header)
|
94
103
|
end
|
95
104
|
|
96
|
-
def
|
105
|
+
def header=(header)
|
106
|
+
write_header(header)
|
107
|
+
end
|
108
|
+
|
109
|
+
def write(record)
|
97
110
|
check_closed
|
98
111
|
|
99
|
-
|
100
|
-
LibHTS.bcf_write(@hts_file, header,
|
112
|
+
# record = record.dup
|
113
|
+
r = LibHTS.bcf_write(@hts_file, header, record)
|
114
|
+
raise "Failed to write record" if r < 0
|
101
115
|
end
|
102
116
|
|
103
|
-
|
117
|
+
def <<(var)
|
118
|
+
write(var)
|
119
|
+
end
|
104
120
|
|
105
121
|
def nsamples
|
106
122
|
check_closed
|
@@ -122,29 +138,6 @@ module HTS
|
|
122
138
|
end
|
123
139
|
end
|
124
140
|
|
125
|
-
private def each_record_copy
|
126
|
-
check_closed
|
127
|
-
|
128
|
-
return to_enum(__method__) unless block_given?
|
129
|
-
|
130
|
-
while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
|
131
|
-
record = Record.new(bcf1, header)
|
132
|
-
yield record
|
133
|
-
end
|
134
|
-
self
|
135
|
-
end
|
136
|
-
|
137
|
-
private def each_record_reuse
|
138
|
-
check_closed
|
139
|
-
|
140
|
-
return to_enum(__method__) unless block_given?
|
141
|
-
|
142
|
-
bcf1 = LibHTS.bcf_init
|
143
|
-
record = Record.new(bcf1, header)
|
144
|
-
yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
|
145
|
-
self
|
146
|
-
end
|
147
|
-
|
148
141
|
def query(...)
|
149
142
|
querys(...) # Fixme
|
150
143
|
end
|
@@ -166,55 +159,6 @@ module HTS
|
|
166
159
|
# private def queryi_reuse
|
167
160
|
# end
|
168
161
|
|
169
|
-
private def querys_copy(region)
|
170
|
-
check_closed
|
171
|
-
|
172
|
-
raise "query is only available for BCF files" unless file_format == "bcf"
|
173
|
-
raise "Index file is required to call the query method." unless index_loaded?
|
174
|
-
return to_enum(__method__, region) unless block_given?
|
175
|
-
|
176
|
-
qitr = LibHTS.bcf_itr_querys(@idx, header, region)
|
177
|
-
|
178
|
-
begin
|
179
|
-
loop do
|
180
|
-
bcf1 = LibHTS.bcf_init
|
181
|
-
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
|
182
|
-
break if slen == -1
|
183
|
-
raise if slen < -1
|
184
|
-
|
185
|
-
yield Record.new(bcf1, header)
|
186
|
-
end
|
187
|
-
ensure
|
188
|
-
LibHTS.bcf_itr_destroy(qitr)
|
189
|
-
end
|
190
|
-
self
|
191
|
-
end
|
192
|
-
|
193
|
-
private def querys_reuse(region)
|
194
|
-
check_closed
|
195
|
-
|
196
|
-
raise "query is only available for BCF files" unless file_format == "bcf"
|
197
|
-
raise "Index file is required to call the query method." unless index_loaded?
|
198
|
-
return to_enum(__method__, region) unless block_given?
|
199
|
-
|
200
|
-
qitr = LibHTS.bcf_itr_querys(@idx, header, region)
|
201
|
-
|
202
|
-
bcf1 = LibHTS.bcf_init
|
203
|
-
record = Record.new(bcf1, header)
|
204
|
-
begin
|
205
|
-
loop do
|
206
|
-
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qitr, bcf1, ::FFI::Pointer::NULL)
|
207
|
-
break if slen == -1
|
208
|
-
raise if slen < -1
|
209
|
-
|
210
|
-
yield record
|
211
|
-
end
|
212
|
-
ensure
|
213
|
-
LibHTS.bcf_itr_destroy(qitr)
|
214
|
-
end
|
215
|
-
self
|
216
|
-
end
|
217
|
-
|
218
162
|
# @!macro [attach] define_getter
|
219
163
|
# @method $1
|
220
164
|
# Get $1 array
|
@@ -231,13 +175,13 @@ module HTS
|
|
231
175
|
def info(key = nil)
|
232
176
|
check_closed
|
233
177
|
position = tell
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
178
|
+
raise NotImplementedError unless key
|
179
|
+
|
180
|
+
ary = map { |r| r.info(key) }
|
181
|
+
|
182
|
+
# ary = each_copy.map { |r| r.info }
|
183
|
+
# ary = map { |r| r.info.clone }
|
184
|
+
|
241
185
|
seek(position)
|
242
186
|
ary
|
243
187
|
end
|
@@ -245,13 +189,13 @@ module HTS
|
|
245
189
|
def format(key = nil)
|
246
190
|
check_closed
|
247
191
|
position = tell
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
192
|
+
raise NotImplementedError unless key
|
193
|
+
|
194
|
+
ary = map { |r| r.format(key) }
|
195
|
+
|
196
|
+
# ary = each_copy.map { |r| r.format }
|
197
|
+
# ary = map { |r| r.format.clone }
|
198
|
+
|
255
199
|
seek(position)
|
256
200
|
ary
|
257
201
|
end
|
@@ -285,5 +229,81 @@ module HTS
|
|
285
229
|
yield r.format(key)
|
286
230
|
end
|
287
231
|
end
|
232
|
+
|
233
|
+
private
|
234
|
+
|
235
|
+
def querys_reuse(region)
|
236
|
+
check_closed
|
237
|
+
|
238
|
+
raise "query is only available for BCF files" unless file_format == "bcf"
|
239
|
+
raise "Index file is required to call the query method." unless index_loaded?
|
240
|
+
return to_enum(__method__, region) unless block_given?
|
241
|
+
|
242
|
+
qiter = LibHTS.bcf_itr_querys(@idx, header, region)
|
243
|
+
raise "Failed to query region #{region}" if qiter.null?
|
244
|
+
|
245
|
+
bcf1 = LibHTS.bcf_init
|
246
|
+
record = Record.new(bcf1, header)
|
247
|
+
begin
|
248
|
+
loop do
|
249
|
+
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
|
250
|
+
break if slen == -1
|
251
|
+
raise if slen < -1
|
252
|
+
|
253
|
+
yield record
|
254
|
+
end
|
255
|
+
ensure
|
256
|
+
LibHTS.bcf_itr_destroy(qiter)
|
257
|
+
end
|
258
|
+
self
|
259
|
+
end
|
260
|
+
|
261
|
+
def querys_copy(region)
|
262
|
+
check_closed
|
263
|
+
|
264
|
+
raise "query is only available for BCF files" unless file_format == "bcf"
|
265
|
+
raise "Index file is required to call the query method." unless index_loaded?
|
266
|
+
return to_enum(__method__, region) unless block_given?
|
267
|
+
|
268
|
+
qiter = LibHTS.bcf_itr_querys(@idx, header, region)
|
269
|
+
raise "Failed to query region #{region}" if qiter.null?
|
270
|
+
|
271
|
+
begin
|
272
|
+
loop do
|
273
|
+
bcf1 = LibHTS.bcf_init
|
274
|
+
slen = LibHTS.hts_itr_next(@hts_file[:fp][:bgzf], qiter, bcf1, ::FFI::Pointer::NULL)
|
275
|
+
break if slen == -1
|
276
|
+
raise if slen < -1
|
277
|
+
|
278
|
+
yield Record.new(bcf1, header)
|
279
|
+
end
|
280
|
+
ensure
|
281
|
+
LibHTS.bcf_itr_destroy(qiter)
|
282
|
+
end
|
283
|
+
self
|
284
|
+
end
|
285
|
+
|
286
|
+
def each_record_reuse
|
287
|
+
check_closed
|
288
|
+
|
289
|
+
return to_enum(__method__) unless block_given?
|
290
|
+
|
291
|
+
bcf1 = LibHTS.bcf_init
|
292
|
+
record = Record.new(bcf1, header)
|
293
|
+
yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
|
294
|
+
self
|
295
|
+
end
|
296
|
+
|
297
|
+
def each_record_copy
|
298
|
+
check_closed
|
299
|
+
|
300
|
+
return to_enum(__method__) unless block_given?
|
301
|
+
|
302
|
+
while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
|
303
|
+
record = Record.new(bcf1, header)
|
304
|
+
yield record
|
305
|
+
end
|
306
|
+
self
|
307
|
+
end
|
288
308
|
end
|
289
309
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require_relative "../faidx"
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class Faidx
|
5
|
+
class Sequence
|
6
|
+
attr_reader :name, :faidx
|
7
|
+
|
8
|
+
def initialize(faidx, name)
|
9
|
+
raise unless faidx.has_key?(name)
|
10
|
+
|
11
|
+
@faidx = faidx
|
12
|
+
@name = name
|
13
|
+
end
|
14
|
+
|
15
|
+
def length
|
16
|
+
faidx.seq_len(name)
|
17
|
+
end
|
18
|
+
alias size length
|
19
|
+
|
20
|
+
def seq(start = nil, stop = nil)
|
21
|
+
faidx.seq(name, start, stop)
|
22
|
+
end
|
23
|
+
|
24
|
+
def qual(start = nil, stop = nil)
|
25
|
+
faidx.qual(name, start, stop)
|
26
|
+
end
|
27
|
+
|
28
|
+
def [](arg)
|
29
|
+
case arg
|
30
|
+
when Integer
|
31
|
+
if arg >= 0
|
32
|
+
start = arg
|
33
|
+
stop = arg
|
34
|
+
else
|
35
|
+
start = length + arg
|
36
|
+
stop = length + arg
|
37
|
+
end
|
38
|
+
when Range
|
39
|
+
arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
|
40
|
+
arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
|
41
|
+
if arg.begin.nil?
|
42
|
+
if arg.end.nil?
|
43
|
+
start = nil
|
44
|
+
stop = nil
|
45
|
+
else
|
46
|
+
start = 0
|
47
|
+
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
48
|
+
end
|
49
|
+
elsif arg.end.nil?
|
50
|
+
# always include the first base
|
51
|
+
start = arg.begin
|
52
|
+
stop = length - 1
|
53
|
+
else
|
54
|
+
start = arg.begin
|
55
|
+
stop = arg.exclude_end? ? arg.end - 1 : arg.end
|
56
|
+
end
|
57
|
+
else
|
58
|
+
raise ArgumentError
|
59
|
+
end
|
60
|
+
seq(start, stop)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/hts/faidx.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "../htslib"
|
4
|
+
require_relative "faidx/sequence"
|
4
5
|
|
5
6
|
module HTS
|
6
7
|
class Faidx
|
@@ -25,7 +26,11 @@ module HTS
|
|
25
26
|
end
|
26
27
|
|
27
28
|
@file_name = file_name
|
28
|
-
@fai =
|
29
|
+
@fai = if [".fq", ".fastq"].include? File.extname(@file_name)
|
30
|
+
LibHTS.fai_load_format(@file_name, 2)
|
31
|
+
else
|
32
|
+
LibHTS.fai_load(@file_name)
|
33
|
+
end
|
29
34
|
|
30
35
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
|
31
36
|
end
|
@@ -38,10 +43,9 @@ module HTS
|
|
38
43
|
LibHTS.fai_destroy(@fai)
|
39
44
|
end
|
40
45
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
# end
|
46
|
+
def file_format
|
47
|
+
@fai[:format]
|
48
|
+
end
|
45
49
|
|
46
50
|
# the number of sequences in the index.
|
47
51
|
def length
|
@@ -50,31 +54,48 @@ module HTS
|
|
50
54
|
alias size length
|
51
55
|
|
52
56
|
# return the length of the requested chromosome.
|
53
|
-
def
|
57
|
+
def names
|
58
|
+
Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
|
59
|
+
end
|
60
|
+
|
61
|
+
alias keys names
|
62
|
+
|
63
|
+
def has_key?(key)
|
64
|
+
raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
|
65
|
+
|
66
|
+
key = key.to_s
|
67
|
+
case LibHTS.faidx_has_seq(@fai, key)
|
68
|
+
when 1 then true
|
69
|
+
when 0 then false
|
70
|
+
else raise
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def [](name)
|
75
|
+
name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
|
76
|
+
Sequence.new(self, name)
|
77
|
+
end
|
78
|
+
|
79
|
+
# return the length of the requested chromosome.
|
80
|
+
def seq_len(chrom)
|
54
81
|
raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
55
82
|
|
56
83
|
chrom = chrom.to_s
|
57
84
|
result = LibHTS.faidx_seq_len(@fai, chrom)
|
58
85
|
result == -1 ? nil : result
|
59
86
|
end
|
60
|
-
alias chrom_length chrom_size
|
61
87
|
|
62
|
-
#
|
63
|
-
def chrom_names
|
64
|
-
Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
|
65
|
-
end
|
66
|
-
|
67
|
-
# @overload fetch(name)
|
88
|
+
# @overload seq(name)
|
68
89
|
# Fetch the sequence as a String.
|
69
90
|
# @param name [String] chr1:0-10
|
70
|
-
# @overload
|
91
|
+
# @overload seq(name, start, stop)
|
71
92
|
# Fetch the sequence as a String.
|
72
93
|
# @param name [String] the name of the chromosome
|
73
94
|
# @param start [Integer] the start position of the sequence (0-based)
|
74
95
|
# @param stop [Integer] the end position of the sequence (0-based)
|
75
96
|
# @return [String] the sequence
|
76
97
|
|
77
|
-
def
|
98
|
+
def fetch_seq(name, start = nil, stop = nil)
|
78
99
|
name = name.to_s
|
79
100
|
rlen = FFI::MemoryPointer.new(:int)
|
80
101
|
|
@@ -84,6 +105,7 @@ module HTS
|
|
84
105
|
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
|
85
106
|
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
|
86
107
|
start > stop && raise(ArgumentError, "Expect start to be <= stop")
|
108
|
+
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
|
87
109
|
|
88
110
|
result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
|
89
111
|
end
|
@@ -95,5 +117,32 @@ module HTS
|
|
95
117
|
|
96
118
|
result
|
97
119
|
end
|
120
|
+
|
121
|
+
alias seq fetch_seq
|
122
|
+
|
123
|
+
def fetch_qual(name, start = nil, stop = nil)
|
124
|
+
name = name.to_s
|
125
|
+
rlen = FFI::MemoryPointer.new(:int)
|
126
|
+
|
127
|
+
if start.nil? && stop.nil?
|
128
|
+
result = LibHTS.fai_fetchqual(@fai, name, rlen)
|
129
|
+
else
|
130
|
+
start < 0 && raise(ArgumentError, "Expect start to be >= 0")
|
131
|
+
stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
|
132
|
+
start > stop && raise(ArgumentError, "Expect start to be <= stop")
|
133
|
+
stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
|
134
|
+
|
135
|
+
result = LibHTS.faidx_fetch_qual(@fai, name, start, stop, rlen)
|
136
|
+
end
|
137
|
+
|
138
|
+
case rlen.read_int
|
139
|
+
when -2 then raise "Invalid chromosome name: #{name}"
|
140
|
+
when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
|
141
|
+
end
|
142
|
+
|
143
|
+
result
|
144
|
+
end
|
145
|
+
|
146
|
+
alias qual fetch_qual
|
98
147
|
end
|
99
148
|
end
|