htslib 0.2.0 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -6
- data/lib/hts/bam/{aux.rb → auxi.rb} +6 -0
- data/lib/hts/bam/record.rb +1 -1
- data/lib/hts/bam.rb +46 -14
- data/lib/hts/bcf/header_record.rb +11 -0
- data/lib/hts/bcf/record.rb +1 -1
- data/lib/hts/bcf.rb +84 -11
- data/lib/hts/faidx.rb +40 -9
- data/lib/hts/hts.rb +11 -6
- data/lib/hts/libhts/constants.rb +20 -19
- data/lib/hts/libhts/cram.rb +64 -0
- data/lib/hts/libhts/sam.rb +12 -0
- data/lib/hts/libhts/sam_funcs.rb +60 -5
- data/lib/hts/libhts/tbx.rb +1 -1
- data/lib/hts/libhts/tbx_funcs.rb +25 -0
- data/lib/hts/libhts/vcf.rb +114 -5
- data/lib/hts/libhts/vcf_funcs.rb +81 -3
- data/lib/hts/libhts.rb +2 -1
- data/lib/hts/tbx.rb +37 -5
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +15 -1
- metadata +7 -4
data/lib/hts/libhts/vcf_funcs.rb
CHANGED
@@ -87,105 +87,130 @@ module HTS
|
|
87
87
|
BCF_UN_ALL = (BCF_UN_SHR | BCF_UN_FMT) # everything
|
88
88
|
|
89
89
|
class << self
|
90
|
+
# Get number of samples
|
90
91
|
def bcf_hdr_nsamples(hdr)
|
91
92
|
hdr[:n][BCF_DT_SAMPLE]
|
92
93
|
end
|
93
94
|
|
95
|
+
# Function for updating INFO fields
|
94
96
|
def bcf_update_info_int32(hdr, line, key, values, n)
|
95
97
|
bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
|
96
98
|
end
|
97
99
|
|
100
|
+
# Function for updating INFO fields
|
98
101
|
def bcf_update_info_float(hdr, line, key, values, n)
|
99
102
|
bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
|
100
103
|
end
|
101
104
|
|
105
|
+
# Function for updating INFO fields
|
102
106
|
def bcf_update_info_flag(hdr, line, key, string, n)
|
103
107
|
bcf_update_info(hdr, line, key, string, n, BCF_HT_FLAG)
|
104
108
|
end
|
105
109
|
|
110
|
+
# Function for updating INFO fields
|
106
111
|
def bcf_update_info_string(hdr, line, key, string)
|
107
112
|
bcf_update_info(hdr, line, key, string, 1, BCF_HT_STR)
|
108
113
|
end
|
109
114
|
|
115
|
+
# Function for updating FORMAT fields
|
110
116
|
def bcf_update_format_int32(hdr, line, key, values, n)
|
111
117
|
bcf_update_format(hdr, line, key, values, n,
|
112
118
|
BCF_HT_INT)
|
113
119
|
end
|
114
120
|
|
121
|
+
# Function for updating FORMAT fields
|
115
122
|
def bcf_update_format_float(hdr, line, key, values, n)
|
116
123
|
bcf_update_format(hdr, line, key, values, n,
|
117
124
|
BCF_HT_REAL)
|
118
125
|
end
|
119
126
|
|
127
|
+
# Function for updating FORMAT fields
|
120
128
|
def bcf_update_format_char(hdr, line, key, values, n)
|
121
129
|
bcf_update_format(hdr, line, key, values, n,
|
122
130
|
BCF_HT_STR)
|
123
131
|
end
|
124
132
|
|
133
|
+
# Function for updating FORMAT fields
|
125
134
|
def bcf_update_genotypes(hdr, line, gts, n)
|
126
135
|
bcf_update_format(hdr, line, "GT", gts, n, BCF_HT_INT)
|
127
136
|
end
|
128
137
|
|
138
|
+
# Macro for setting genotypes correctly
|
129
139
|
def bcf_gt_phased(idx)
|
130
140
|
((idx + 1) << 1 | 1)
|
131
141
|
end
|
132
142
|
|
143
|
+
# Macro for setting genotypes correctly
|
133
144
|
def bcf_gt_unphased(idx)
|
134
145
|
((idx + 1) << 1)
|
135
146
|
end
|
136
147
|
|
148
|
+
# Macro for setting genotypes correctly
|
137
149
|
def bcf_gt_missing
|
138
150
|
0
|
139
151
|
end
|
140
152
|
|
153
|
+
# Macro for setting genotypes correctly
|
141
154
|
def bcf_gt_is_missing(val)
|
142
|
-
(
|
155
|
+
(val >> 1 ? 0 : 1)
|
143
156
|
end
|
144
157
|
|
158
|
+
# Macro for setting genotypes correctly
|
145
159
|
def bcf_gt_is_phased(idx)
|
146
|
-
(
|
160
|
+
(idx & 1)
|
147
161
|
end
|
148
162
|
|
163
|
+
# Macro for setting genotypes correctly
|
149
164
|
def bcf_gt_allele(val)
|
150
|
-
((
|
165
|
+
((val >> 1) - 1)
|
151
166
|
end
|
152
167
|
|
168
|
+
# Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based)
|
153
169
|
def bcf_alleles2gt(a, b)
|
154
170
|
((a) > (b) ? (a * (a + 1) / 2 + b) : (b * (b + 1) / 2 + a))
|
155
171
|
end
|
156
172
|
|
173
|
+
# Get INFO values
|
157
174
|
def bcf_get_info_int32(hdr, line, tag, dst, ndst)
|
158
175
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
|
159
176
|
end
|
160
177
|
|
178
|
+
# Get INFO values
|
161
179
|
def bcf_get_info_float(hdr, line, tag, dst, ndst)
|
162
180
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
|
163
181
|
end
|
164
182
|
|
183
|
+
# Get INFO values
|
165
184
|
def bcf_get_info_string(hdr, line, tag, dst, ndst)
|
166
185
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
|
167
186
|
end
|
168
187
|
|
188
|
+
# Get INFO values
|
169
189
|
def bcf_get_info_flag(hdr, line, tag, dst, ndst)
|
170
190
|
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_FLAG)
|
171
191
|
end
|
172
192
|
|
193
|
+
# Get FORMAT values
|
173
194
|
def bcf_get_format_int32(hdr, line, tag, dst, ndst)
|
174
195
|
bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
|
175
196
|
end
|
176
197
|
|
198
|
+
# Get FORMAT values
|
177
199
|
def bcf_get_format_float(hdr, line, tag, dst, ndst)
|
178
200
|
bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
|
179
201
|
end
|
180
202
|
|
203
|
+
# Get FORMAT values
|
181
204
|
def bcf_get_format_char(hdr, line, tag, dst, ndst)
|
182
205
|
bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
|
183
206
|
end
|
184
207
|
|
208
|
+
# Get FORMAT values
|
185
209
|
def bcf_get_genotypes(hdr, line, dst, ndst)
|
186
210
|
bcf_get_format_values(hdr, line, "GT", dst, ndst, BCF_HT_INT)
|
187
211
|
end
|
188
212
|
|
213
|
+
# Translates numeric ID into string
|
189
214
|
def bcf_hdr_int2id(hdr, type, int_id)
|
190
215
|
LibHTS::BcfIdpair.new(
|
191
216
|
hdr[:id][type].to_ptr +
|
@@ -193,10 +218,12 @@ module HTS
|
|
193
218
|
)[:key]
|
194
219
|
end
|
195
220
|
|
221
|
+
# Translates sequence names (chromosomes) into numeric ID
|
196
222
|
def bcf_hdr_name2id(hdr, id)
|
197
223
|
bcf_hdr_id2int(hdr, BCF_DT_CTG, id)
|
198
224
|
end
|
199
225
|
|
226
|
+
# Translates numeric ID to sequence name
|
200
227
|
def bcf_hdr_id2name(hdr, rid)
|
201
228
|
return nil if hdr.null? || rid < 0 || rid >= hdr[:n][LibHTS::BCF_DT_CTG]
|
202
229
|
|
@@ -206,6 +233,7 @@ module HTS
|
|
206
233
|
)[:key]
|
207
234
|
end
|
208
235
|
|
236
|
+
# Macro for accessing bcf_idinfo_t
|
209
237
|
def bcf_hdr_id2length(hdr, type, int_id)
|
210
238
|
LibHTS::BcfIdpair.new(
|
211
239
|
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
@@ -213,6 +241,7 @@ module HTS
|
|
213
241
|
)[:val][:info][type] >> 8 & 0xf
|
214
242
|
end
|
215
243
|
|
244
|
+
# Macro for accessing bcf_idinfo_t
|
216
245
|
def bcf_hdr_id2number(hdr, type, int_id)
|
217
246
|
LibHTS::BcfIdpair.new(
|
218
247
|
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
@@ -220,6 +249,7 @@ module HTS
|
|
220
249
|
)[:val][:info][type] >> 12
|
221
250
|
end
|
222
251
|
|
252
|
+
# Macro for accessing bcf_idinfo_t
|
223
253
|
def bcf_hdr_id2type(hdr, type, int_id)
|
224
254
|
LibHTS::BcfIdpair.new(
|
225
255
|
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
@@ -227,12 +257,60 @@ module HTS
|
|
227
257
|
)[:val][:info][type] >> 4 & 0xf
|
228
258
|
end
|
229
259
|
|
260
|
+
# Macro for accessing bcf_idinfo_t
|
230
261
|
def bcf_hdr_id2coltype(hdr, type, int_id)
|
231
262
|
LibHTS::BcfIdpair.new(
|
232
263
|
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
233
264
|
LibHTS::BcfIdpair.size * int_id # offset
|
234
265
|
)[:val][:info][type] & 0xf
|
235
266
|
end
|
267
|
+
|
268
|
+
# def bcf_hdr_idinfo_exists
|
269
|
+
|
270
|
+
# def bcf_hdr_id2hrec
|
271
|
+
|
272
|
+
alias bcf_itr_destroy hts_itr_destroy
|
273
|
+
|
274
|
+
def bcf_itr_queryi(idx, tid, beg, _end)
|
275
|
+
hts_itr_query(idx, tid, beg, _end, @@bcf_readrec)
|
276
|
+
end
|
277
|
+
|
278
|
+
@@bcf_hdr_name2id = proc do |hdr, id|
|
279
|
+
LibHTS.bcf_hdr_name2id(hdr, id)
|
280
|
+
end
|
281
|
+
|
282
|
+
def bcf_itr_querys(idx, hdr, s)
|
283
|
+
hts_itr_querys(idx, s, @@bcf_hdr_name2id, hdr, @@hts_itr_query, @@bcf_readrec)
|
284
|
+
end
|
285
|
+
|
286
|
+
# Load a BCF index
|
287
|
+
def bcf_index_load(fn)
|
288
|
+
hts_idx_load(fn, HTS_FMT_CSI)
|
289
|
+
end
|
290
|
+
|
291
|
+
# Load a BCF index
|
292
|
+
def bcf_index_seqnames(idx, hdr, nptr)
|
293
|
+
hts_idx_seqnames(idx, nptr, @@bcf_hdr_id2name, hdr)
|
294
|
+
end
|
295
|
+
|
296
|
+
# Typed value I/O
|
297
|
+
def bcf_int8_vector_end = -127 # INT8_MIN + 1
|
298
|
+
def bcf_int16_vector_end = -32_767 # INT16_MIN + 1
|
299
|
+
def bcf_int32_vector_end = -2_147_483_647 # INT32_MIN + 1
|
300
|
+
def bcf_int64_vector_end = -9_223_372_036_854_775_807 # INT64_MIN + 1
|
301
|
+
def bcf_str_vector_end = 0
|
302
|
+
def bcf_int8_missing = -128 # INT8_MIN
|
303
|
+
def bcf_int16_missing = (-32_767 - 1) # INT16_MIN
|
304
|
+
def bcf_int32_missing = (-2_147_483_647 - 1) # INT32_MIN
|
305
|
+
def bcf_int64_missing = (-9_223_372_036_854_775_807 - 1) # INT64_MIN
|
306
|
+
def bcf_str_missing = 0x07
|
307
|
+
|
308
|
+
BCF_MAX_BT_INT8 = 0x7f # INT8_MAX */
|
309
|
+
BCF_MAX_BT_INT16 = 0x7fff # INT16_MAX */
|
310
|
+
BCF_MAX_BT_INT32 = 0x7fffffff # INT32_MAX */
|
311
|
+
BCF_MIN_BT_INT8 = -120 # INT8_MIN + 8 */
|
312
|
+
BCF_MIN_BT_INT16 = -32_760 # INT16_MIN + 8 */
|
313
|
+
BCF_MIN_BT_INT32 = -2_147_483_640 # INT32_MIN + 8 */
|
236
314
|
end
|
237
315
|
end
|
238
316
|
end
|
data/lib/hts/libhts.rb
CHANGED
@@ -15,12 +15,13 @@ module HTS
|
|
15
15
|
end
|
16
16
|
|
17
17
|
# @!macro attach_function
|
18
|
+
# @!scope class
|
18
19
|
# @!method $1(${2--2})
|
19
20
|
# @return [${-1}] the return value of $0
|
20
21
|
def self.attach_function(*)
|
21
22
|
super
|
22
23
|
rescue FFI::NotFoundError => e
|
23
|
-
warn e.message
|
24
|
+
warn e.message if $VERBOSE
|
24
25
|
end
|
25
26
|
end
|
26
27
|
end
|
data/lib/hts/tbx.rb
CHANGED
@@ -8,7 +8,7 @@ module HTS
|
|
8
8
|
class Tbx < Hts
|
9
9
|
include Enumerable
|
10
10
|
|
11
|
-
attr_reader :file_name
|
11
|
+
attr_reader :file_name, :index_name, :mode, :nthreads
|
12
12
|
|
13
13
|
def self.open(*args, **kw)
|
14
14
|
file = new(*args, **kw) # do not yield
|
@@ -22,24 +22,56 @@ module HTS
|
|
22
22
|
file
|
23
23
|
end
|
24
24
|
|
25
|
-
def initialize(file_name, threads: nil)
|
25
|
+
def initialize(file_name, mode = "r", index: nil, threads: nil, build_index: false)
|
26
26
|
if block_given?
|
27
27
|
message = "HTS::Tbx.new() dose not take block; Please use HTS::Tbx.open() instead"
|
28
28
|
raise message
|
29
29
|
end
|
30
30
|
|
31
|
-
@file_name = file_name
|
32
|
-
|
33
31
|
# NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
|
34
32
|
|
35
|
-
@
|
33
|
+
@file_name = file_name
|
34
|
+
@index_name = index
|
35
|
+
@mode = mode
|
36
|
+
@nthreads = threads
|
36
37
|
@hts_file = LibHTS.hts_open(@file_name, @mode)
|
37
38
|
|
38
39
|
raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
|
39
40
|
|
40
41
|
set_threads(threads) if threads
|
41
42
|
|
43
|
+
# return if @mode[0] == "w"
|
44
|
+
raise "Not implemented" if @mode[0] == "w"
|
45
|
+
|
46
|
+
# build_index(index) if build_index
|
47
|
+
@idx = load_index(index)
|
48
|
+
|
42
49
|
super # do nothing
|
43
50
|
end
|
51
|
+
|
52
|
+
def build_index
|
53
|
+
raise "Not implemented yet"
|
54
|
+
end
|
55
|
+
|
56
|
+
def load_index(index_name = nil)
|
57
|
+
if index_name
|
58
|
+
LibHTS.tbx_index_load2(@file_name, index_name)
|
59
|
+
else
|
60
|
+
LibHTS.tbx_index_load3(@file_name, nil, 2)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def tid(name)
|
65
|
+
LibHTS.tbx_name2id(@idx, name)
|
66
|
+
end
|
67
|
+
|
68
|
+
def seqnames
|
69
|
+
nseq = FFI::MemoryPointer.new(:int)
|
70
|
+
LibHTS.tbx_seqnames(@idx, nseq).then do |pts|
|
71
|
+
pts.read_array_of_pointer(nseq.read_int).map do |pt|
|
72
|
+
pt.read_string
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
44
76
|
end
|
45
77
|
end
|
data/lib/hts/version.rb
CHANGED
data/lib/htslib.rb
CHANGED
@@ -30,9 +30,23 @@ module HTS
|
|
30
30
|
|
31
31
|
warn "htslib shared library '#{name}' not found."
|
32
32
|
end
|
33
|
+
|
34
|
+
def search_htslib_windows
|
35
|
+
ENV["HTSLIBDIR"] ||= [
|
36
|
+
RubyInstaller::Runtime.msys2_installation.msys_path,
|
37
|
+
RubyInstaller::Runtime.msys2_installation.mingwarch
|
38
|
+
].join(File::ALT_SEPARATOR)
|
39
|
+
path = File.expand_path("bin/hts-3.dll", ENV["HTSLIBDIR"])
|
40
|
+
RubyInstaller::Runtime.add_dll_directory(File.dirname(path))
|
41
|
+
path
|
42
|
+
end
|
33
43
|
end
|
34
44
|
|
35
|
-
self.lib_path =
|
45
|
+
self.lib_path = if Object.const_defined?(:RubyInstaller)
|
46
|
+
search_htslib_windows
|
47
|
+
else
|
48
|
+
search_htslib
|
49
|
+
end
|
36
50
|
|
37
51
|
# You can change the path of the shared library with `HTS.lib_path=`
|
38
52
|
# before calling the LibHTS module.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: htslib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -132,7 +132,7 @@ files:
|
|
132
132
|
- LICENSE.txt
|
133
133
|
- README.md
|
134
134
|
- lib/hts/bam.rb
|
135
|
-
- lib/hts/bam/
|
135
|
+
- lib/hts/bam/auxi.rb
|
136
136
|
- lib/hts/bam/cigar.rb
|
137
137
|
- lib/hts/bam/flag.rb
|
138
138
|
- lib/hts/bam/header.rb
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- lib/hts/bcf.rb
|
141
141
|
- lib/hts/bcf/format.rb
|
142
142
|
- lib/hts/bcf/header.rb
|
143
|
+
- lib/hts/bcf/header_record.rb
|
143
144
|
- lib/hts/bcf/info.rb
|
144
145
|
- lib/hts/bcf/record.rb
|
145
146
|
- lib/hts/faidx.rb
|
@@ -158,6 +159,7 @@ files:
|
|
158
159
|
- lib/hts/libhts/sam.rb
|
159
160
|
- lib/hts/libhts/sam_funcs.rb
|
160
161
|
- lib/hts/libhts/tbx.rb
|
162
|
+
- lib/hts/libhts/tbx_funcs.rb
|
161
163
|
- lib/hts/libhts/thread_pool.rb
|
162
164
|
- lib/hts/libhts/vcf.rb
|
163
165
|
- lib/hts/libhts/vcf_funcs.rb
|
@@ -167,7 +169,8 @@ files:
|
|
167
169
|
homepage: https://github.com/kojix2/ruby-htslib
|
168
170
|
licenses:
|
169
171
|
- MIT
|
170
|
-
metadata:
|
172
|
+
metadata:
|
173
|
+
msys2_mingw_dependencies: htslib
|
171
174
|
post_install_message:
|
172
175
|
rdoc_options: []
|
173
176
|
require_paths:
|