htslib 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -10
- data/lib/hts/bam.rb +8 -8
- data/lib/hts/bam/alignment.rb +0 -1
- data/lib/hts/fai.rb +43 -4
- data/lib/hts/ffi.rb +46 -4
- data/lib/hts/ffi/bgzf.rb +1 -1
- data/lib/hts/ffi/constants.rb +30 -41
- data/lib/hts/ffi/hfile.rb +1 -1
- data/lib/hts/ffi/hts.rb +2 -2
- data/lib/hts/ffi/sam.rb +39 -8
- data/lib/hts/ffi/vcf.rb +80 -11
- data/lib/hts/vcf.rb +21 -11
- data/lib/hts/vcf/header.rb +24 -0
- data/lib/hts/vcf/variant.rb +43 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +24 -6
- metadata +13 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d4fe8c4a8f710ee3b35793e997f394ba8f80e0fe5b507768de2af2c5ab1297a0
|
|
4
|
+
data.tar.gz: 1bb06ced745342f4de8665f046304364c9d419ae05eee355c3cd852d6b2b454a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8c7fb677d2462a1ddf4cd146d9f9962b570f39762e98d893d77f89bbbf61c92329ed8391030f772bf4f98fb3dc874edc3c365dd2d85266e1e966b24effc1715c
|
|
7
|
+
data.tar.gz: fee9ec647e3ef51e83a1aec1c62ac02c796dcfefe4d5fd6c901e4b04e4253619e7337cee651df1ece9ca58c74a7a50620befd5cd8ab823696d0f893a437bd562
|
data/README.md
CHANGED
|
@@ -4,12 +4,19 @@
|
|
|
4
4
|

|
|
5
5
|
[](LICENSE.txt)
|
|
6
6
|
[](https://zenodo.org/badge/latestdoi/247078205)
|
|
7
|
+
[](https://rubydoc.info/gems/htslib)
|
|
7
8
|
|
|
8
9
|
:dna: [HTSlib](https://github.com/samtools/htslib) - high-throughput sequencing data manipulation - for Ruby
|
|
9
10
|
|
|
10
11
|
:apple: Feel free to fork it out if you can develop it!
|
|
11
12
|
|
|
12
|
-
:bowtie: Just a prototype.
|
|
13
|
+
:bowtie: Just a prototype. Pre-alpha stage.
|
|
14
|
+
|
|
15
|
+
## Requirements
|
|
16
|
+
|
|
17
|
+
* [htslib](https://github.com/samtools/htslib)
|
|
18
|
+
* Ubuntu : `apt install libhts-dev`
|
|
19
|
+
* macOS : `brew install htslib`
|
|
13
20
|
|
|
14
21
|
## Installation
|
|
15
22
|
|
|
@@ -17,16 +24,14 @@
|
|
|
17
24
|
gem install htslib
|
|
18
25
|
```
|
|
19
26
|
|
|
20
|
-
|
|
27
|
+
If you installed htslib with Ubuntu/apt or Mac/homebrew, pkg-config will automatically detect the location of the shared library.
|
|
28
|
+
|
|
29
|
+
Or you can set the environment variable `HTSLIBDIR`.
|
|
21
30
|
|
|
22
31
|
```sh
|
|
23
|
-
export HTSLIBDIR="/your/path/to/htslib"
|
|
32
|
+
export HTSLIBDIR="/your/path/to/htslib" # libhts.so
|
|
24
33
|
```
|
|
25
34
|
|
|
26
|
-
## Requirements
|
|
27
|
-
|
|
28
|
-
* [htslib](https://github.com/samtools/htslib)
|
|
29
|
-
|
|
30
35
|
## Usage
|
|
31
36
|
|
|
32
37
|
HTS::FFI - Low-level API
|
|
@@ -42,6 +47,23 @@ p b[:format]
|
|
|
42
47
|
|
|
43
48
|
A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) is under development.
|
|
44
49
|
|
|
50
|
+
```ruby
|
|
51
|
+
require 'htslib'
|
|
52
|
+
|
|
53
|
+
bam = HTS::Bam.new("a.bam")
|
|
54
|
+
|
|
55
|
+
bam.each do |aln|
|
|
56
|
+
p name: aln.qname,
|
|
57
|
+
flag: aln.flag,
|
|
58
|
+
start: aln.start + 1,
|
|
59
|
+
mpos: aln.mate_pos + 1,
|
|
60
|
+
mqual: aln.mapping_quality,
|
|
61
|
+
seq: aln.sequence,
|
|
62
|
+
cigar: aln.cigar.to_s,
|
|
63
|
+
qual: aln.base_qualities.map { |i| (i + 33).chr }.join
|
|
64
|
+
end
|
|
65
|
+
```
|
|
66
|
+
|
|
45
67
|
## Documentation
|
|
46
68
|
|
|
47
69
|
* [RubyDoc.info - HTSlib](https://rdoc.info/gems/htslib)
|
|
@@ -51,15 +73,17 @@ A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python
|
|
|
51
73
|
To get started with development
|
|
52
74
|
|
|
53
75
|
```sh
|
|
54
|
-
git clone --
|
|
76
|
+
git clone --recursive https://github.com/kojix2/ruby-htslib
|
|
55
77
|
cd ruby-htslib
|
|
56
78
|
bundle install
|
|
57
|
-
bundle exec rake htslib:
|
|
58
|
-
bundle exec rake
|
|
79
|
+
bundle exec rake htslib:build
|
|
80
|
+
bundle exec rake test
|
|
59
81
|
```
|
|
60
82
|
|
|
61
83
|
## Contributing
|
|
62
84
|
|
|
85
|
+
Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
|
|
86
|
+
|
|
63
87
|
* [Report bugs](https://github.com/kojix2/ruby-htslib/issues)
|
|
64
88
|
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-htslib/pulls)
|
|
65
89
|
* Write, clarify, or fix documentation
|
data/lib/hts/bam.rb
CHANGED
|
@@ -10,20 +10,20 @@ require_relative 'bam/alignment'
|
|
|
10
10
|
module HTS
|
|
11
11
|
class Bam
|
|
12
12
|
include Enumerable
|
|
13
|
-
attr_reader :
|
|
13
|
+
attr_reader :file_path, :mode, :header, :htf
|
|
14
14
|
|
|
15
|
-
def initialize(
|
|
16
|
-
@
|
|
17
|
-
File.exist?(@
|
|
15
|
+
def initialize(file_path, mode = 'r', create_index: nil, header: nil, fasta: nil)
|
|
16
|
+
@file_path = File.expand_path(file_path)
|
|
17
|
+
File.exist?(@file_path) || raise("No such SAM/BAM file - #{@file_path}")
|
|
18
18
|
|
|
19
19
|
@mode = mode
|
|
20
|
-
@htf = FFI.hts_open(@
|
|
20
|
+
@htf = FFI.hts_open(@file_path, mode)
|
|
21
21
|
|
|
22
22
|
if mode[0] == 'r'
|
|
23
|
-
@idx = FFI.sam_index_load(@htf, @
|
|
23
|
+
@idx = FFI.sam_index_load(@htf, @file_path)
|
|
24
24
|
if (@idx.null? && create_index.nil?) || create_index
|
|
25
|
-
FFI.sam_index_build(
|
|
26
|
-
@idx = FFI.sam_index_load(@htf, @
|
|
25
|
+
FFI.sam_index_build(file_path, -1)
|
|
26
|
+
@idx = FFI.sam_index_load(@htf, @file_path)
|
|
27
27
|
warn 'NO querying'
|
|
28
28
|
end
|
|
29
29
|
@header = Bam::Header.new(FFI.sam_hdr_read(@htf))
|
data/lib/hts/bam/alignment.rb
CHANGED
data/lib/hts/fai.rb
CHANGED
|
@@ -5,13 +5,52 @@
|
|
|
5
5
|
|
|
6
6
|
module HTS
|
|
7
7
|
class Fai
|
|
8
|
-
|
|
8
|
+
# FIXME: API
|
|
9
|
+
def self.open(path)
|
|
10
|
+
fai = new(path)
|
|
11
|
+
if block_given?
|
|
12
|
+
yield(fai)
|
|
13
|
+
fai.close
|
|
14
|
+
else
|
|
15
|
+
fai
|
|
16
|
+
end
|
|
17
|
+
end
|
|
9
18
|
|
|
10
|
-
|
|
19
|
+
def initialize(path)
|
|
20
|
+
@path = File.expand_path(path)
|
|
21
|
+
@path.delete_suffix!('.fai')
|
|
22
|
+
FFI.fai_build(@path) unless File.exist?("#{@path}.fai")
|
|
23
|
+
@fai = FFI.fai_load(@path)
|
|
24
|
+
raise if @fai.null?
|
|
11
25
|
|
|
12
|
-
|
|
26
|
+
# at_exit{FFI.fai_destroy(@fai)}
|
|
27
|
+
end
|
|
13
28
|
|
|
14
|
-
def
|
|
29
|
+
def close
|
|
30
|
+
FFI.fai_destroy(@fai)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# the number of sequences in the index.
|
|
34
|
+
def size
|
|
35
|
+
FFI.faidx_nseq(@fai)
|
|
36
|
+
end
|
|
37
|
+
alias length size
|
|
38
|
+
|
|
39
|
+
# return the length of the requested chromosome.
|
|
40
|
+
def chrom_size(chrom)
|
|
41
|
+
raise ArgumentError, 'Expect chrom to be String or Symbol' unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
|
42
|
+
|
|
43
|
+
chrom = chrom.to_s
|
|
44
|
+
result = FFI.faidx_seq_len(@fai, chrom)
|
|
45
|
+
result == -1 ? nil : result
|
|
46
|
+
end
|
|
47
|
+
alias chrom_length chrom_size
|
|
48
|
+
|
|
49
|
+
# FIXME: naming and syntax
|
|
50
|
+
def cget; end
|
|
51
|
+
|
|
52
|
+
# FIXME: naming and syntax
|
|
53
|
+
def get; end
|
|
15
54
|
|
|
16
55
|
# __iter__
|
|
17
56
|
end
|
data/lib/hts/ffi.rb
CHANGED
|
@@ -20,12 +20,54 @@ end
|
|
|
20
20
|
|
|
21
21
|
module FFI
|
|
22
22
|
class Struct
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
class << self
|
|
24
|
+
def union_layout(*args)
|
|
25
|
+
Class.new(FFI::Union) { layout(*args) }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def struct_layout(*args)
|
|
29
|
+
Class.new(FFI::Struct) { layout(*args) }
|
|
30
|
+
end
|
|
25
31
|
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class BitStruct < Struct
|
|
35
|
+
class << self
|
|
36
|
+
module BitFieldsModule
|
|
37
|
+
def [](name)
|
|
38
|
+
bit_fields = self.class.bit_fields_hash_table
|
|
39
|
+
parent, start, width = bit_fields[name]
|
|
40
|
+
if parent
|
|
41
|
+
(super(parent) >> start) & ((1 << width) - 1)
|
|
42
|
+
else
|
|
43
|
+
super(name)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
private_constant :BitFieldsModule
|
|
48
|
+
|
|
49
|
+
attr_reader :bit_fields_hash_table
|
|
50
|
+
|
|
51
|
+
def bitfields(*args)
|
|
52
|
+
unless instance_variable_defined?(:@bit_fields_hash_table)
|
|
53
|
+
@bit_fields_hash_table = {}
|
|
54
|
+
prepend BitFieldsModule
|
|
55
|
+
end
|
|
26
56
|
|
|
27
|
-
|
|
28
|
-
|
|
57
|
+
parent = args.shift
|
|
58
|
+
labels = []
|
|
59
|
+
widths = []
|
|
60
|
+
args.each_slice(2) do |l, w|
|
|
61
|
+
labels << l
|
|
62
|
+
widths << w
|
|
63
|
+
end
|
|
64
|
+
starts = widths.inject([0]) do |result, w|
|
|
65
|
+
result << (result.last + w)
|
|
66
|
+
end
|
|
67
|
+
labels.zip(starts, widths).each do |l, s, w|
|
|
68
|
+
@bit_fields_hash_table[l] = [parent, s, w]
|
|
69
|
+
end
|
|
70
|
+
end
|
|
29
71
|
end
|
|
30
72
|
end
|
|
31
73
|
end
|
data/lib/hts/ffi/bgzf.rb
CHANGED
data/lib/hts/ffi/constants.rb
CHANGED
|
@@ -8,13 +8,23 @@ module HTS
|
|
|
8
8
|
|
|
9
9
|
# kstring
|
|
10
10
|
|
|
11
|
-
class
|
|
11
|
+
class KString < ::FFI::Struct
|
|
12
12
|
layout \
|
|
13
13
|
:l, :size_t,
|
|
14
14
|
:m, :size_t,
|
|
15
15
|
:s, :string
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
+
class KSeq < ::FFI::Struct
|
|
19
|
+
layout \
|
|
20
|
+
:name, KString,
|
|
21
|
+
:comment, KString,
|
|
22
|
+
:seq, KString,
|
|
23
|
+
:qual, KString,
|
|
24
|
+
:last_char, :int,
|
|
25
|
+
:f, :pointer # FIXME
|
|
26
|
+
end
|
|
27
|
+
|
|
18
28
|
# BGZF
|
|
19
29
|
class BGZF < ::FFI::Struct
|
|
20
30
|
layout \
|
|
@@ -162,7 +172,7 @@ module HTS
|
|
|
162
172
|
layout \
|
|
163
173
|
:bitfields, :uint32, # FIXME
|
|
164
174
|
:lineno, :int64,
|
|
165
|
-
:line,
|
|
175
|
+
:line, KString,
|
|
166
176
|
:fn, :string,
|
|
167
177
|
:fn_aux, :string,
|
|
168
178
|
:fp,
|
|
@@ -259,36 +269,6 @@ module HTS
|
|
|
259
269
|
class BamMplp < ::FFI::Struct
|
|
260
270
|
end
|
|
261
271
|
|
|
262
|
-
BAM_CMATCH = 0
|
|
263
|
-
BAM_CINS = 1
|
|
264
|
-
BAM_CDEL = 2
|
|
265
|
-
BAM_CREF_SKIP = 3
|
|
266
|
-
BAM_CSOFT_CLIP = 4
|
|
267
|
-
BAM_CHARD_CLIP = 5
|
|
268
|
-
BAM_CPAD = 6
|
|
269
|
-
BAM_CEQUAL = 7
|
|
270
|
-
BAM_CDIFF = 8
|
|
271
|
-
BAM_CBACK = 9
|
|
272
|
-
|
|
273
|
-
BAM_CIGAR_STR = 'MIDNSHP=XB'
|
|
274
|
-
BAM_CIGAR_STR_PADDED = 'MIDNSHP=XB??????'
|
|
275
|
-
BAM_CIGAR_SHIFT = 4
|
|
276
|
-
BAM_CIGAR_MASK = 0xf
|
|
277
|
-
BAM_CIGAR_TYPE = 0x3C1A7
|
|
278
|
-
|
|
279
|
-
BAM_FPAIRED = 1
|
|
280
|
-
BAM_FPROPER_PAIR = 2
|
|
281
|
-
BAM_FUNMAP = 4
|
|
282
|
-
BAM_FMUNMAP = 8
|
|
283
|
-
BAM_FREVERSE = 16
|
|
284
|
-
BAM_FMREVERSE = 32
|
|
285
|
-
BAM_FREAD1 = 64
|
|
286
|
-
BAM_FREAD2 = 128
|
|
287
|
-
BAM_FSECONDARY = 256
|
|
288
|
-
BAM_FQCFAIL = 512
|
|
289
|
-
BAM_FDUP = 1024
|
|
290
|
-
BAM_FSUPPLEMENTARY = 2048
|
|
291
|
-
|
|
292
272
|
class TbxConf < ::FFI::Struct
|
|
293
273
|
layout \
|
|
294
274
|
:preset, :int32,
|
|
@@ -386,10 +366,10 @@ module HTS
|
|
|
386
366
|
:nhrec, :int,
|
|
387
367
|
:dirty, :int,
|
|
388
368
|
:ntransl, :int,
|
|
389
|
-
:transl, :pointer,
|
|
369
|
+
:transl, [:pointer, 2],
|
|
390
370
|
:nsamples_ori, :int,
|
|
391
371
|
:keep_samples, :pointer,
|
|
392
|
-
:mem,
|
|
372
|
+
:mem, KString,
|
|
393
373
|
:m, [:int, 3]
|
|
394
374
|
end
|
|
395
375
|
|
|
@@ -401,9 +381,10 @@ module HTS
|
|
|
401
381
|
:m_als, :int,
|
|
402
382
|
:m_allele, :int,
|
|
403
383
|
:m_flt, :int,
|
|
384
|
+
:n_flt, :int,
|
|
404
385
|
:flt, :pointer,
|
|
405
386
|
:id, :string,
|
|
406
|
-
:als, :string
|
|
387
|
+
:als, :pointer, # (\\0-separated string)
|
|
407
388
|
:allele, :pointer,
|
|
408
389
|
:info, BcfInfo.ptr,
|
|
409
390
|
:fmt, BcfFmt.ptr,
|
|
@@ -414,21 +395,29 @@ module HTS
|
|
|
414
395
|
:indiv_dirty, :int
|
|
415
396
|
end
|
|
416
397
|
|
|
417
|
-
class Bcf1 < ::FFI::
|
|
398
|
+
class Bcf1 < ::FFI::BitStruct
|
|
418
399
|
layout \
|
|
419
400
|
:pos, :hts_pos_t,
|
|
420
401
|
:rlen, :hts_pos_t,
|
|
421
|
-
:rid, :
|
|
402
|
+
:rid, :int32_t,
|
|
422
403
|
:qual, :float,
|
|
423
|
-
:
|
|
424
|
-
:
|
|
425
|
-
:shared,
|
|
426
|
-
:indiv,
|
|
404
|
+
:n_info_allele, :uint32_t, # FIXME
|
|
405
|
+
:n_fmt_sample, :uint32_t, # FIXME
|
|
406
|
+
:shared, KString,
|
|
407
|
+
:indiv, KString,
|
|
427
408
|
:d, BcfDec,
|
|
428
409
|
:max_unpack, :int,
|
|
429
410
|
:unpacked, :int,
|
|
430
411
|
:unpack_size, [:int, 3],
|
|
431
412
|
:errcode, :int
|
|
413
|
+
|
|
414
|
+
bitfields :n_info_allele,
|
|
415
|
+
:n_info, 16,
|
|
416
|
+
:n_allele, 16
|
|
417
|
+
|
|
418
|
+
bitfields :n_fmt_sample,
|
|
419
|
+
:n_fmt, 8,
|
|
420
|
+
:n_sample, 24
|
|
432
421
|
end
|
|
433
422
|
end
|
|
434
423
|
end
|
data/lib/hts/ffi/hfile.rb
CHANGED
data/lib/hts/ffi/hts.rb
CHANGED
|
@@ -8,7 +8,7 @@ module HTS
|
|
|
8
8
|
|
|
9
9
|
attach_function \
|
|
10
10
|
:hts_lib_shutdown,
|
|
11
|
-
[
|
|
11
|
+
[],
|
|
12
12
|
:void
|
|
13
13
|
|
|
14
14
|
attach_function \
|
|
@@ -110,7 +110,7 @@ module HTS
|
|
|
110
110
|
# Read a line (and its \n or \r\n terminator) from a file
|
|
111
111
|
attach_function \
|
|
112
112
|
:hts_getline,
|
|
113
|
-
[HtsFile, :int,
|
|
113
|
+
[HtsFile, :int, KString],
|
|
114
114
|
:int
|
|
115
115
|
|
|
116
116
|
attach_function \
|
data/lib/hts/ffi/sam.rb
CHANGED
|
@@ -2,6 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
module HTS
|
|
4
4
|
module FFI
|
|
5
|
+
# constants
|
|
6
|
+
BAM_CMATCH = 0
|
|
7
|
+
BAM_CINS = 1
|
|
8
|
+
BAM_CDEL = 2
|
|
9
|
+
BAM_CREF_SKIP = 3
|
|
10
|
+
BAM_CSOFT_CLIP = 4
|
|
11
|
+
BAM_CHARD_CLIP = 5
|
|
12
|
+
BAM_CPAD = 6
|
|
13
|
+
BAM_CEQUAL = 7
|
|
14
|
+
BAM_CDIFF = 8
|
|
15
|
+
BAM_CBACK = 9
|
|
16
|
+
|
|
17
|
+
BAM_CIGAR_STR = 'MIDNSHP=XB'
|
|
18
|
+
BAM_CIGAR_STR_PADDED = 'MIDNSHP=XB??????'
|
|
19
|
+
BAM_CIGAR_SHIFT = 4
|
|
20
|
+
BAM_CIGAR_MASK = 0xf
|
|
21
|
+
BAM_CIGAR_TYPE = 0x3C1A7
|
|
22
|
+
|
|
5
23
|
# macros
|
|
6
24
|
class << self
|
|
7
25
|
def bam_cigar_op(c)
|
|
@@ -13,7 +31,7 @@ module HTS
|
|
|
13
31
|
end
|
|
14
32
|
|
|
15
33
|
def bam_cigar_opchr(c)
|
|
16
|
-
(BAM_CIGAR_STR
|
|
34
|
+
("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
|
|
17
35
|
end
|
|
18
36
|
|
|
19
37
|
def bam_cigar_gen(l, o)
|
|
@@ -25,6 +43,19 @@ module HTS
|
|
|
25
43
|
end
|
|
26
44
|
end
|
|
27
45
|
|
|
46
|
+
BAM_FPAIRED = 1
|
|
47
|
+
BAM_FPROPER_PAIR = 2
|
|
48
|
+
BAM_FUNMAP = 4
|
|
49
|
+
BAM_FMUNMAP = 8
|
|
50
|
+
BAM_FREVERSE = 16
|
|
51
|
+
BAM_FMREVERSE = 32
|
|
52
|
+
BAM_FREAD1 = 64
|
|
53
|
+
BAM_FREAD2 = 128
|
|
54
|
+
BAM_FSECONDARY = 256
|
|
55
|
+
BAM_FQCFAIL = 512
|
|
56
|
+
BAM_FDUP = 1024
|
|
57
|
+
BAM_FSUPPLEMENTARY = 2048
|
|
58
|
+
|
|
28
59
|
# macros
|
|
29
60
|
# function-like macros
|
|
30
61
|
class << self
|
|
@@ -148,13 +179,13 @@ module HTS
|
|
|
148
179
|
# Returns a complete line of formatted text for a given type and ID.
|
|
149
180
|
attach_function \
|
|
150
181
|
:sam_hdr_find_line_id,
|
|
151
|
-
[SamHdr, :string, :string, :string,
|
|
182
|
+
[SamHdr, :string, :string, :string, KString],
|
|
152
183
|
:int
|
|
153
184
|
|
|
154
185
|
# Returns a complete line of formatted text for a given type and index.
|
|
155
186
|
attach_function \
|
|
156
187
|
:sam_hdr_find_line_pos,
|
|
157
|
-
[SamHdr, :string, :int,
|
|
188
|
+
[SamHdr, :string, :int, KString],
|
|
158
189
|
:int
|
|
159
190
|
|
|
160
191
|
# Remove a line with given type / id from a header
|
|
@@ -208,13 +239,13 @@ module HTS
|
|
|
208
239
|
# Return the value associated with a key for a header line identified by ID_key:ID_val
|
|
209
240
|
attach_function \
|
|
210
241
|
:sam_hdr_find_tag_id,
|
|
211
|
-
[SamHdr, :string, :string, :string, :string,
|
|
242
|
+
[SamHdr, :string, :string, :string, :string, KString],
|
|
212
243
|
:int
|
|
213
244
|
|
|
214
245
|
# Return the value associated with a key for a header line identified by position
|
|
215
246
|
attach_function \
|
|
216
247
|
:sam_hdr_find_tag_pos,
|
|
217
|
-
[SamHdr, :string, :int, :string,
|
|
248
|
+
[SamHdr, :string, :int, :string, KString],
|
|
218
249
|
:int
|
|
219
250
|
|
|
220
251
|
# Remove the key from the line identified by type, ID_key and ID_value.
|
|
@@ -445,12 +476,12 @@ module HTS
|
|
|
445
476
|
|
|
446
477
|
attach_function \
|
|
447
478
|
:sam_parse1,
|
|
448
|
-
[
|
|
479
|
+
[KString, SamHdr, Bam1],
|
|
449
480
|
:int
|
|
450
481
|
|
|
451
482
|
attach_function \
|
|
452
483
|
:sam_format1,
|
|
453
|
-
[SamHdr, Bam1,
|
|
484
|
+
[SamHdr, Bam1, KString],
|
|
454
485
|
:int
|
|
455
486
|
|
|
456
487
|
# Read a record from a file
|
|
@@ -598,7 +629,7 @@ module HTS
|
|
|
598
629
|
# sets a callback to initialise any per-pileup1_t fields.
|
|
599
630
|
attach_function \
|
|
600
631
|
:bam_plp_insertion,
|
|
601
|
-
[:pointer,
|
|
632
|
+
[:pointer, KString, :pointer],
|
|
602
633
|
:int
|
|
603
634
|
|
|
604
635
|
# sets a callback to initialise any per-pileup1_t fields.
|
data/lib/hts/ffi/vcf.rb
CHANGED
|
@@ -2,6 +2,75 @@
|
|
|
2
2
|
|
|
3
3
|
module HTS
|
|
4
4
|
module FFI
|
|
5
|
+
# constants
|
|
6
|
+
BCF_HL_FLT = 0 # header line
|
|
7
|
+
BCF_HL_INFO = 1
|
|
8
|
+
BCF_HL_FMT = 2
|
|
9
|
+
BCF_HL_CTG = 3
|
|
10
|
+
BCF_HL_STR = 4 # structured header line TAG=<A=..,B=..>
|
|
11
|
+
BCF_HL_GEN = 5 # generic header line
|
|
12
|
+
BCF_HT_FLAG = 0 # header type
|
|
13
|
+
|
|
14
|
+
BCF_HT_INT = 1
|
|
15
|
+
BCF_HT_REAL = 2
|
|
16
|
+
BCF_HT_STR = 3
|
|
17
|
+
BCF_HT_LONG = (BCF_HT_INT | 0x100) # BCF_HT_INT, but for int64_t values; VCF only!
|
|
18
|
+
|
|
19
|
+
BCF_VL_FIXED = 0 # variable length
|
|
20
|
+
BCF_VL_VAR = 1
|
|
21
|
+
BCF_VL_A = 2
|
|
22
|
+
BCF_VL_G = 3
|
|
23
|
+
BCF_VL_R = 4
|
|
24
|
+
|
|
25
|
+
BCF_DT_ID = 0 # dictionary type
|
|
26
|
+
BCF_DT_CTG = 1
|
|
27
|
+
BCF_DT_SAMPLE = 2
|
|
28
|
+
|
|
29
|
+
BCF_BT_NULL = 0
|
|
30
|
+
BCF_BT_INT8 = 1
|
|
31
|
+
BCF_BT_INT16 = 2
|
|
32
|
+
BCF_BT_INT32 = 3
|
|
33
|
+
BCF_BT_INT64 = 4 # Unofficial, for internal use only.
|
|
34
|
+
BCF_BT_FLOAT = 5
|
|
35
|
+
BCF_BT_CHAR = 7
|
|
36
|
+
|
|
37
|
+
VCF_REF = 0
|
|
38
|
+
VCF_SNP = 1
|
|
39
|
+
VCF_MNP = 2
|
|
40
|
+
VCF_INDEL = 4
|
|
41
|
+
VCF_OTHER = 8
|
|
42
|
+
VCF_BND = 16 # breakend
|
|
43
|
+
VCF_OVERLAP = 32 # overlapping deletion, ALT=*
|
|
44
|
+
|
|
45
|
+
BCF1_DIRTY_ID = 1
|
|
46
|
+
BCF1_DIRTY_ALS = 2
|
|
47
|
+
BCF1_DIRTY_FLT = 4
|
|
48
|
+
BCF1_DIRTY_INF = 8
|
|
49
|
+
|
|
50
|
+
BCF_ERR_CTG_UNDEF = 1
|
|
51
|
+
BCF_ERR_TAG_UNDEF = 2
|
|
52
|
+
BCF_ERR_NCOLS = 4
|
|
53
|
+
BCF_ERR_LIMITS = 8
|
|
54
|
+
BCF_ERR_CHAR = 16
|
|
55
|
+
BCF_ERR_CTG_INVALID = 32
|
|
56
|
+
BCF_ERR_TAG_INVALID = 64
|
|
57
|
+
|
|
58
|
+
# macros
|
|
59
|
+
class << self
|
|
60
|
+
def bcf_hdr_nsamples(hdr)
|
|
61
|
+
hdr[:n][BCF_DT_SAMPLE]
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# constants
|
|
66
|
+
BCF_UN_STR = 1 # up to ALT inclusive
|
|
67
|
+
BCF_UN_FLT = 2 # up to FILTER
|
|
68
|
+
BCF_UN_INFO = 4 # up to INFO
|
|
69
|
+
BCF_UN_SHR = (BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO) # all shared information
|
|
70
|
+
BCF_UN_FMT = 8 # unpack format and each sample
|
|
71
|
+
BCF_UN_IND = BCF_UN_FMT # a synonym of BCF_UN_FMT
|
|
72
|
+
BCF_UN_ALL = (BCF_UN_SHR | BCF_UN_FMT) # everything
|
|
73
|
+
|
|
5
74
|
attach_function \
|
|
6
75
|
:bcf_hdr_init,
|
|
7
76
|
[:string],
|
|
@@ -14,7 +83,7 @@ module HTS
|
|
|
14
83
|
|
|
15
84
|
attach_function \
|
|
16
85
|
:bcf_init,
|
|
17
|
-
[
|
|
86
|
+
[],
|
|
18
87
|
Bcf1.by_ref
|
|
19
88
|
|
|
20
89
|
attach_function \
|
|
@@ -54,7 +123,7 @@ module HTS
|
|
|
54
123
|
|
|
55
124
|
attach_function \
|
|
56
125
|
:vcf_parse,
|
|
57
|
-
[
|
|
126
|
+
[KString, BcfHdr, Bcf1],
|
|
58
127
|
:int
|
|
59
128
|
|
|
60
129
|
attach_function \
|
|
@@ -64,7 +133,7 @@ module HTS
|
|
|
64
133
|
|
|
65
134
|
attach_function \
|
|
66
135
|
:vcf_format,
|
|
67
|
-
[BcfHdr, Bcf1,
|
|
136
|
+
[BcfHdr, Bcf1, KString],
|
|
68
137
|
:int
|
|
69
138
|
|
|
70
139
|
attach_function \
|
|
@@ -119,7 +188,7 @@ module HTS
|
|
|
119
188
|
|
|
120
189
|
attach_function \
|
|
121
190
|
:vcf_write_line,
|
|
122
|
-
[HtsFile,
|
|
191
|
+
[HtsFile, KString],
|
|
123
192
|
:int
|
|
124
193
|
|
|
125
194
|
attach_function \
|
|
@@ -144,7 +213,7 @@ module HTS
|
|
|
144
213
|
|
|
145
214
|
attach_function \
|
|
146
215
|
:bcf_hdr_format,
|
|
147
|
-
[BcfHdr, :int,
|
|
216
|
+
[BcfHdr, :int, KString],
|
|
148
217
|
:int
|
|
149
218
|
|
|
150
219
|
attach_function \
|
|
@@ -204,7 +273,7 @@ module HTS
|
|
|
204
273
|
|
|
205
274
|
attach_function \
|
|
206
275
|
:bcf_hrec_format,
|
|
207
|
-
[BcfHrec,
|
|
276
|
+
[BcfHrec, KString],
|
|
208
277
|
:int
|
|
209
278
|
|
|
210
279
|
attach_function \
|
|
@@ -369,27 +438,27 @@ module HTS
|
|
|
369
438
|
|
|
370
439
|
attach_function \
|
|
371
440
|
:bcf_fmt_array,
|
|
372
|
-
[
|
|
441
|
+
[KString, :int, :int, :pointer],
|
|
373
442
|
:int
|
|
374
443
|
|
|
375
444
|
attach_function \
|
|
376
445
|
:bcf_fmt_sized_array,
|
|
377
|
-
[
|
|
446
|
+
[KString, :pointer],
|
|
378
447
|
:uint8_t
|
|
379
448
|
|
|
380
449
|
attach_function \
|
|
381
450
|
:bcf_enc_vchar,
|
|
382
|
-
[
|
|
451
|
+
[KString, :int, :string],
|
|
383
452
|
:int
|
|
384
453
|
|
|
385
454
|
attach_function \
|
|
386
455
|
:bcf_enc_vint,
|
|
387
|
-
[
|
|
456
|
+
[KString, :int, :pointer, :int],
|
|
388
457
|
:int
|
|
389
458
|
|
|
390
459
|
attach_function \
|
|
391
460
|
:bcf_enc_vfloat,
|
|
392
|
-
[
|
|
461
|
+
[KString, :int, :pointer],
|
|
393
462
|
:int
|
|
394
463
|
|
|
395
464
|
attach_function \
|
data/lib/hts/vcf.rb
CHANGED
|
@@ -3,27 +3,37 @@
|
|
|
3
3
|
# Based on hts-python
|
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
|
5
5
|
|
|
6
|
+
require_relative 'vcf/header'
|
|
7
|
+
require_relative 'vcf/variant'
|
|
8
|
+
|
|
6
9
|
module HTS
|
|
7
10
|
class VCF
|
|
8
|
-
|
|
11
|
+
include Enumerable
|
|
12
|
+
attr_reader :file_path, :mode, :header, :htf
|
|
9
13
|
|
|
10
|
-
def
|
|
14
|
+
def initialize(file_path, mode = 'r')
|
|
15
|
+
@file_path = File.expand_path(file_path)
|
|
16
|
+
File.exist?(@file_path) || raise("No such VCF/BCF file - #{@file_path}")
|
|
11
17
|
|
|
12
|
-
|
|
18
|
+
@mode = mode
|
|
19
|
+
@htf = FFI.hts_open(@file_path, mode)
|
|
13
20
|
|
|
14
|
-
|
|
21
|
+
@header = VCF::Header.new(FFI.bcf_hdr_read(@htf))
|
|
15
22
|
|
|
16
|
-
|
|
17
|
-
|
|
23
|
+
@c = FFI.bcf_init
|
|
24
|
+
end
|
|
18
25
|
|
|
19
|
-
|
|
20
|
-
def initialize; end
|
|
26
|
+
# def inspect; end
|
|
21
27
|
|
|
22
|
-
def
|
|
28
|
+
def each(&block)
|
|
29
|
+
block.call(Variant.new(@c, self)) while FFI.bcf_read(@htf, @header.h, @c) != -1
|
|
30
|
+
end
|
|
23
31
|
|
|
24
|
-
def
|
|
32
|
+
def seq(tid); end
|
|
25
33
|
|
|
26
|
-
def
|
|
34
|
+
def n_samples
|
|
35
|
+
FFI.bcf_hdr_nsamples(header.h)
|
|
36
|
+
end
|
|
27
37
|
end
|
|
28
38
|
|
|
29
39
|
class Format
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class VCF
|
|
5
|
+
class Header
|
|
6
|
+
attr_reader :h
|
|
7
|
+
|
|
8
|
+
def initialize(h)
|
|
9
|
+
@h = h
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# FIXME: better name?
|
|
13
|
+
def seqs
|
|
14
|
+
Array.new(@h[:n_targets]) do |i|
|
|
15
|
+
FFI.sam_hdr_tid2name(@h, i)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def text
|
|
20
|
+
FFI.sam_hdr_str(@h)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTS
|
|
4
|
+
class VCF
|
|
5
|
+
class Variant
|
|
6
|
+
def initialize(bcf_t, vcf)
|
|
7
|
+
@c = bcf_t
|
|
8
|
+
FFI.bcf_unpack(@c, HTS::FFI::BCF_UN_ALL) # FIXME
|
|
9
|
+
@vcf = vcf
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# def inspect; end
|
|
13
|
+
|
|
14
|
+
def formats; end
|
|
15
|
+
|
|
16
|
+
def genotypes; end
|
|
17
|
+
|
|
18
|
+
def pos
|
|
19
|
+
@c[:pos] + 1 # FIXME
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def start
|
|
23
|
+
@c[:pos]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def stop
|
|
27
|
+
@c[:pos] + @c[:rlen]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def id
|
|
31
|
+
@c[:d][:id]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def qual
|
|
35
|
+
@c[:qual]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def ref
|
|
39
|
+
@c[:d][:allele].get_pointer(0).read_string
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
data/lib/hts/version.rb
CHANGED
data/lib/htslib.rb
CHANGED
|
@@ -9,15 +9,33 @@ module HTS
|
|
|
9
9
|
|
|
10
10
|
class << self
|
|
11
11
|
attr_accessor :ffi_lib
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
suffix = ::FFI::Platform::LIBSUFFIX
|
|
15
12
|
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
def search_htslib(name = nil)
|
|
14
|
+
name ||= "libhts.#{::FFI::Platform::LIBSUFFIX}"
|
|
15
|
+
lib_path = if ENV['HTSLIBDIR']
|
|
16
|
+
File.expand_path(name, ENV['HTSLIBDIR'])
|
|
18
17
|
else
|
|
19
|
-
File.expand_path("../vendor
|
|
18
|
+
File.expand_path("../vendor/#{name}", __dir__)
|
|
20
19
|
end
|
|
20
|
+
return lib_path if File.exist?(lib_path)
|
|
21
|
+
|
|
22
|
+
begin
|
|
23
|
+
require 'pkg-config'
|
|
24
|
+
lib_dir = PKGConfig.variable('htslib', 'libdir')
|
|
25
|
+
lib_path = File.expand_path(name, lib_dir)
|
|
26
|
+
rescue PackageConfig::NotFoundError
|
|
27
|
+
warn "htslib.pc was not found in the pkg-config search path."
|
|
28
|
+
end
|
|
29
|
+
return lib_path if File.exist?(lib_path)
|
|
30
|
+
|
|
31
|
+
warn "htslib shared library '#{name}' not found."
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
self.ffi_lib = search_htslib
|
|
36
|
+
|
|
37
|
+
# You can change the path of the shared library with `HTS.ffi_lib=`
|
|
38
|
+
# before calling the FFI module.
|
|
21
39
|
autoload :FFI, 'hts/ffi'
|
|
22
40
|
end
|
|
23
41
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: htslib
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-06-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: ffi
|
|
@@ -25,13 +25,13 @@ dependencies:
|
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '0'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name:
|
|
28
|
+
name: pkg-config
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - ">="
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
33
|
version: '0'
|
|
34
|
-
type: :
|
|
34
|
+
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
@@ -39,7 +39,7 @@ dependencies:
|
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '0'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
|
-
name:
|
|
42
|
+
name: bundler
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
45
|
- - ">="
|
|
@@ -53,7 +53,7 @@ dependencies:
|
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '0'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
|
-
name:
|
|
56
|
+
name: minitest
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
58
58
|
requirements:
|
|
59
59
|
- - ">="
|
|
@@ -108,7 +108,7 @@ dependencies:
|
|
|
108
108
|
- - ">="
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
110
|
version: '0'
|
|
111
|
-
description:
|
|
111
|
+
description:
|
|
112
112
|
email:
|
|
113
113
|
- 2xijok@gmail.com
|
|
114
114
|
executables: []
|
|
@@ -134,13 +134,15 @@ files:
|
|
|
134
134
|
- lib/hts/ffi/vcf.rb
|
|
135
135
|
- lib/hts/tbx.rb
|
|
136
136
|
- lib/hts/vcf.rb
|
|
137
|
+
- lib/hts/vcf/header.rb
|
|
138
|
+
- lib/hts/vcf/variant.rb
|
|
137
139
|
- lib/hts/version.rb
|
|
138
140
|
- lib/htslib.rb
|
|
139
141
|
homepage: https://github.com/kojix2/ruby-htslib
|
|
140
142
|
licenses:
|
|
141
143
|
- MIT
|
|
142
144
|
metadata: {}
|
|
143
|
-
post_install_message:
|
|
145
|
+
post_install_message:
|
|
144
146
|
rdoc_options: []
|
|
145
147
|
require_paths:
|
|
146
148
|
- lib
|
|
@@ -155,8 +157,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
155
157
|
- !ruby/object:Gem::Version
|
|
156
158
|
version: '0'
|
|
157
159
|
requirements: []
|
|
158
|
-
rubygems_version: 3.
|
|
159
|
-
signing_key:
|
|
160
|
+
rubygems_version: 3.2.15
|
|
161
|
+
signing_key:
|
|
160
162
|
specification_version: 4
|
|
161
163
|
summary: HTSlib bindings for Ruby
|
|
162
164
|
test_files: []
|