htslib 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +34 -10
- data/lib/hts/bam.rb +8 -8
- data/lib/hts/bam/alignment.rb +0 -1
- data/lib/hts/fai.rb +43 -4
- data/lib/hts/ffi.rb +46 -4
- data/lib/hts/ffi/bgzf.rb +1 -1
- data/lib/hts/ffi/constants.rb +30 -41
- data/lib/hts/ffi/hfile.rb +1 -1
- data/lib/hts/ffi/hts.rb +2 -2
- data/lib/hts/ffi/sam.rb +39 -8
- data/lib/hts/ffi/vcf.rb +80 -11
- data/lib/hts/vcf.rb +21 -11
- data/lib/hts/vcf/header.rb +24 -0
- data/lib/hts/vcf/variant.rb +43 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +24 -6
- metadata +13 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4fe8c4a8f710ee3b35793e997f394ba8f80e0fe5b507768de2af2c5ab1297a0
|
4
|
+
data.tar.gz: 1bb06ced745342f4de8665f046304364c9d419ae05eee355c3cd852d6b2b454a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c7fb677d2462a1ddf4cd146d9f9962b570f39762e98d893d77f89bbbf61c92329ed8391030f772bf4f98fb3dc874edc3c365dd2d85266e1e966b24effc1715c
|
7
|
+
data.tar.gz: fee9ec647e3ef51e83a1aec1c62ac02c796dcfefe4d5fd6c901e4b04e4253619e7337cee651df1ece9ca58c74a7a50620befd5cd8ab823696d0f893a437bd562
|
data/README.md
CHANGED
@@ -4,12 +4,19 @@
|
|
4
4
|
![CI](https://github.com/kojix2/ruby-htslib/workflows/CI/badge.svg)
|
5
5
|
[![The MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE.txt)
|
6
6
|
[![DOI](https://zenodo.org/badge/247078205.svg)](https://zenodo.org/badge/latestdoi/247078205)
|
7
|
+
[![Docs Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/htslib)
|
7
8
|
|
8
9
|
:dna: [HTSlib](https://github.com/samtools/htslib) - high-throughput sequencing data manipulation - for Ruby
|
9
10
|
|
10
11
|
:apple: Feel free to fork it out if you can develop it!
|
11
12
|
|
12
|
-
:bowtie: Just a prototype.
|
13
|
+
:bowtie: Just a prototype. Pre-alpha stage.
|
14
|
+
|
15
|
+
## Requirements
|
16
|
+
|
17
|
+
* [htslib](https://github.com/samtools/htslib)
|
18
|
+
* Ubuntu : `apt install libhts-dev`
|
19
|
+
* macOS : `brew install htslib`
|
13
20
|
|
14
21
|
## Installation
|
15
22
|
|
@@ -17,16 +24,14 @@
|
|
17
24
|
gem install htslib
|
18
25
|
```
|
19
26
|
|
20
|
-
|
27
|
+
If you installed htslib with Ubuntu/apt or Mac/homebrew, pkg-config will automatically detect the location of the shared library.
|
28
|
+
|
29
|
+
Or you can set the environment variable `HTSLIBDIR`.
|
21
30
|
|
22
31
|
```sh
|
23
|
-
export HTSLIBDIR="/your/path/to/htslib"
|
32
|
+
export HTSLIBDIR="/your/path/to/htslib" # libhts.so
|
24
33
|
```
|
25
34
|
|
26
|
-
## Requirements
|
27
|
-
|
28
|
-
* [htslib](https://github.com/samtools/htslib)
|
29
|
-
|
30
35
|
## Usage
|
31
36
|
|
32
37
|
HTS::FFI - Low-level API
|
@@ -42,6 +47,23 @@ p b[:format]
|
|
42
47
|
|
43
48
|
A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) is under development.
|
44
49
|
|
50
|
+
```ruby
|
51
|
+
require 'htslib'
|
52
|
+
|
53
|
+
bam = HTS::Bam.new("a.bam")
|
54
|
+
|
55
|
+
bam.each do |aln|
|
56
|
+
p name: aln.qname,
|
57
|
+
flag: aln.flag,
|
58
|
+
start: aln.start + 1,
|
59
|
+
mpos: aln.mate_pos + 1,
|
60
|
+
mqual: aln.mapping_quality,
|
61
|
+
seq: aln.sequence,
|
62
|
+
cigar: aln.cigar.to_s,
|
63
|
+
qual: aln.base_qualities.map { |i| (i + 33).chr }.join
|
64
|
+
end
|
65
|
+
```
|
66
|
+
|
45
67
|
## Documentation
|
46
68
|
|
47
69
|
* [RubyDoc.info - HTSlib](https://rdoc.info/gems/htslib)
|
@@ -51,15 +73,17 @@ A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python
|
|
51
73
|
To get started with development
|
52
74
|
|
53
75
|
```sh
|
54
|
-
git clone --
|
76
|
+
git clone --recursive https://github.com/kojix2/ruby-htslib
|
55
77
|
cd ruby-htslib
|
56
78
|
bundle install
|
57
|
-
bundle exec rake htslib:
|
58
|
-
bundle exec rake
|
79
|
+
bundle exec rake htslib:build
|
80
|
+
bundle exec rake test
|
59
81
|
```
|
60
82
|
|
61
83
|
## Contributing
|
62
84
|
|
85
|
+
Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
|
86
|
+
|
63
87
|
* [Report bugs](https://github.com/kojix2/ruby-htslib/issues)
|
64
88
|
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-htslib/pulls)
|
65
89
|
* Write, clarify, or fix documentation
|
data/lib/hts/bam.rb
CHANGED
@@ -10,20 +10,20 @@ require_relative 'bam/alignment'
|
|
10
10
|
module HTS
|
11
11
|
class Bam
|
12
12
|
include Enumerable
|
13
|
-
attr_reader :
|
13
|
+
attr_reader :file_path, :mode, :header, :htf
|
14
14
|
|
15
|
-
def initialize(
|
16
|
-
@
|
17
|
-
File.exist?(@
|
15
|
+
def initialize(file_path, mode = 'r', create_index: nil, header: nil, fasta: nil)
|
16
|
+
@file_path = File.expand_path(file_path)
|
17
|
+
File.exist?(@file_path) || raise("No such SAM/BAM file - #{@file_path}")
|
18
18
|
|
19
19
|
@mode = mode
|
20
|
-
@htf = FFI.hts_open(@
|
20
|
+
@htf = FFI.hts_open(@file_path, mode)
|
21
21
|
|
22
22
|
if mode[0] == 'r'
|
23
|
-
@idx = FFI.sam_index_load(@htf, @
|
23
|
+
@idx = FFI.sam_index_load(@htf, @file_path)
|
24
24
|
if (@idx.null? && create_index.nil?) || create_index
|
25
|
-
FFI.sam_index_build(
|
26
|
-
@idx = FFI.sam_index_load(@htf, @
|
25
|
+
FFI.sam_index_build(file_path, -1)
|
26
|
+
@idx = FFI.sam_index_load(@htf, @file_path)
|
27
27
|
warn 'NO querying'
|
28
28
|
end
|
29
29
|
@header = Bam::Header.new(FFI.sam_hdr_read(@htf))
|
data/lib/hts/bam/alignment.rb
CHANGED
data/lib/hts/fai.rb
CHANGED
@@ -5,13 +5,52 @@
|
|
5
5
|
|
6
6
|
module HTS
|
7
7
|
class Fai
|
8
|
-
|
8
|
+
# FIXME: API
|
9
|
+
def self.open(path)
|
10
|
+
fai = new(path)
|
11
|
+
if block_given?
|
12
|
+
yield(fai)
|
13
|
+
fai.close
|
14
|
+
else
|
15
|
+
fai
|
16
|
+
end
|
17
|
+
end
|
9
18
|
|
10
|
-
|
19
|
+
def initialize(path)
|
20
|
+
@path = File.expand_path(path)
|
21
|
+
@path.delete_suffix!('.fai')
|
22
|
+
FFI.fai_build(@path) unless File.exist?("#{@path}.fai")
|
23
|
+
@fai = FFI.fai_load(@path)
|
24
|
+
raise if @fai.null?
|
11
25
|
|
12
|
-
|
26
|
+
# at_exit{FFI.fai_destroy(@fai)}
|
27
|
+
end
|
13
28
|
|
14
|
-
def
|
29
|
+
def close
|
30
|
+
FFI.fai_destroy(@fai)
|
31
|
+
end
|
32
|
+
|
33
|
+
# the number of sequences in the index.
|
34
|
+
def size
|
35
|
+
FFI.faidx_nseq(@fai)
|
36
|
+
end
|
37
|
+
alias length size
|
38
|
+
|
39
|
+
# return the length of the requested chromosome.
|
40
|
+
def chrom_size(chrom)
|
41
|
+
raise ArgumentError, 'Expect chrom to be String or Symbol' unless chrom.is_a?(String) || chrom.is_a?(Symbol)
|
42
|
+
|
43
|
+
chrom = chrom.to_s
|
44
|
+
result = FFI.faidx_seq_len(@fai, chrom)
|
45
|
+
result == -1 ? nil : result
|
46
|
+
end
|
47
|
+
alias chrom_length chrom_size
|
48
|
+
|
49
|
+
# FIXME: naming and syntax
|
50
|
+
def cget; end
|
51
|
+
|
52
|
+
# FIXME: naming and syntax
|
53
|
+
def get; end
|
15
54
|
|
16
55
|
# __iter__
|
17
56
|
end
|
data/lib/hts/ffi.rb
CHANGED
@@ -20,12 +20,54 @@ end
|
|
20
20
|
|
21
21
|
module FFI
|
22
22
|
class Struct
|
23
|
-
|
24
|
-
|
23
|
+
class << self
|
24
|
+
def union_layout(*args)
|
25
|
+
Class.new(FFI::Union) { layout(*args) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def struct_layout(*args)
|
29
|
+
Class.new(FFI::Struct) { layout(*args) }
|
30
|
+
end
|
25
31
|
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class BitStruct < Struct
|
35
|
+
class << self
|
36
|
+
module BitFieldsModule
|
37
|
+
def [](name)
|
38
|
+
bit_fields = self.class.bit_fields_hash_table
|
39
|
+
parent, start, width = bit_fields[name]
|
40
|
+
if parent
|
41
|
+
(super(parent) >> start) & ((1 << width) - 1)
|
42
|
+
else
|
43
|
+
super(name)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
private_constant :BitFieldsModule
|
48
|
+
|
49
|
+
attr_reader :bit_fields_hash_table
|
50
|
+
|
51
|
+
def bitfields(*args)
|
52
|
+
unless instance_variable_defined?(:@bit_fields_hash_table)
|
53
|
+
@bit_fields_hash_table = {}
|
54
|
+
prepend BitFieldsModule
|
55
|
+
end
|
26
56
|
|
27
|
-
|
28
|
-
|
57
|
+
parent = args.shift
|
58
|
+
labels = []
|
59
|
+
widths = []
|
60
|
+
args.each_slice(2) do |l, w|
|
61
|
+
labels << l
|
62
|
+
widths << w
|
63
|
+
end
|
64
|
+
starts = widths.inject([0]) do |result, w|
|
65
|
+
result << (result.last + w)
|
66
|
+
end
|
67
|
+
labels.zip(starts, widths).each do |l, s, w|
|
68
|
+
@bit_fields_hash_table[l] = [parent, s, w]
|
69
|
+
end
|
70
|
+
end
|
29
71
|
end
|
30
72
|
end
|
31
73
|
end
|
data/lib/hts/ffi/bgzf.rb
CHANGED
data/lib/hts/ffi/constants.rb
CHANGED
@@ -8,13 +8,23 @@ module HTS
|
|
8
8
|
|
9
9
|
# kstring
|
10
10
|
|
11
|
-
class
|
11
|
+
class KString < ::FFI::Struct
|
12
12
|
layout \
|
13
13
|
:l, :size_t,
|
14
14
|
:m, :size_t,
|
15
15
|
:s, :string
|
16
16
|
end
|
17
17
|
|
18
|
+
class KSeq < ::FFI::Struct
|
19
|
+
layout \
|
20
|
+
:name, KString,
|
21
|
+
:comment, KString,
|
22
|
+
:seq, KString,
|
23
|
+
:qual, KString,
|
24
|
+
:last_char, :int,
|
25
|
+
:f, :pointer # FIXME
|
26
|
+
end
|
27
|
+
|
18
28
|
# BGZF
|
19
29
|
class BGZF < ::FFI::Struct
|
20
30
|
layout \
|
@@ -162,7 +172,7 @@ module HTS
|
|
162
172
|
layout \
|
163
173
|
:bitfields, :uint32, # FIXME
|
164
174
|
:lineno, :int64,
|
165
|
-
:line,
|
175
|
+
:line, KString,
|
166
176
|
:fn, :string,
|
167
177
|
:fn_aux, :string,
|
168
178
|
:fp,
|
@@ -259,36 +269,6 @@ module HTS
|
|
259
269
|
class BamMplp < ::FFI::Struct
|
260
270
|
end
|
261
271
|
|
262
|
-
BAM_CMATCH = 0
|
263
|
-
BAM_CINS = 1
|
264
|
-
BAM_CDEL = 2
|
265
|
-
BAM_CREF_SKIP = 3
|
266
|
-
BAM_CSOFT_CLIP = 4
|
267
|
-
BAM_CHARD_CLIP = 5
|
268
|
-
BAM_CPAD = 6
|
269
|
-
BAM_CEQUAL = 7
|
270
|
-
BAM_CDIFF = 8
|
271
|
-
BAM_CBACK = 9
|
272
|
-
|
273
|
-
BAM_CIGAR_STR = 'MIDNSHP=XB'
|
274
|
-
BAM_CIGAR_STR_PADDED = 'MIDNSHP=XB??????'
|
275
|
-
BAM_CIGAR_SHIFT = 4
|
276
|
-
BAM_CIGAR_MASK = 0xf
|
277
|
-
BAM_CIGAR_TYPE = 0x3C1A7
|
278
|
-
|
279
|
-
BAM_FPAIRED = 1
|
280
|
-
BAM_FPROPER_PAIR = 2
|
281
|
-
BAM_FUNMAP = 4
|
282
|
-
BAM_FMUNMAP = 8
|
283
|
-
BAM_FREVERSE = 16
|
284
|
-
BAM_FMREVERSE = 32
|
285
|
-
BAM_FREAD1 = 64
|
286
|
-
BAM_FREAD2 = 128
|
287
|
-
BAM_FSECONDARY = 256
|
288
|
-
BAM_FQCFAIL = 512
|
289
|
-
BAM_FDUP = 1024
|
290
|
-
BAM_FSUPPLEMENTARY = 2048
|
291
|
-
|
292
272
|
class TbxConf < ::FFI::Struct
|
293
273
|
layout \
|
294
274
|
:preset, :int32,
|
@@ -386,10 +366,10 @@ module HTS
|
|
386
366
|
:nhrec, :int,
|
387
367
|
:dirty, :int,
|
388
368
|
:ntransl, :int,
|
389
|
-
:transl, :pointer,
|
369
|
+
:transl, [:pointer, 2],
|
390
370
|
:nsamples_ori, :int,
|
391
371
|
:keep_samples, :pointer,
|
392
|
-
:mem,
|
372
|
+
:mem, KString,
|
393
373
|
:m, [:int, 3]
|
394
374
|
end
|
395
375
|
|
@@ -401,9 +381,10 @@ module HTS
|
|
401
381
|
:m_als, :int,
|
402
382
|
:m_allele, :int,
|
403
383
|
:m_flt, :int,
|
384
|
+
:n_flt, :int,
|
404
385
|
:flt, :pointer,
|
405
386
|
:id, :string,
|
406
|
-
:als, :string
|
387
|
+
:als, :pointer, # (\\0-separated string)
|
407
388
|
:allele, :pointer,
|
408
389
|
:info, BcfInfo.ptr,
|
409
390
|
:fmt, BcfFmt.ptr,
|
@@ -414,21 +395,29 @@ module HTS
|
|
414
395
|
:indiv_dirty, :int
|
415
396
|
end
|
416
397
|
|
417
|
-
class Bcf1 < ::FFI::
|
398
|
+
class Bcf1 < ::FFI::BitStruct
|
418
399
|
layout \
|
419
400
|
:pos, :hts_pos_t,
|
420
401
|
:rlen, :hts_pos_t,
|
421
|
-
:rid, :
|
402
|
+
:rid, :int32_t,
|
422
403
|
:qual, :float,
|
423
|
-
:
|
424
|
-
:
|
425
|
-
:shared,
|
426
|
-
:indiv,
|
404
|
+
:n_info_allele, :uint32_t, # FIXME
|
405
|
+
:n_fmt_sample, :uint32_t, # FIXME
|
406
|
+
:shared, KString,
|
407
|
+
:indiv, KString,
|
427
408
|
:d, BcfDec,
|
428
409
|
:max_unpack, :int,
|
429
410
|
:unpacked, :int,
|
430
411
|
:unpack_size, [:int, 3],
|
431
412
|
:errcode, :int
|
413
|
+
|
414
|
+
bitfields :n_info_allele,
|
415
|
+
:n_info, 16,
|
416
|
+
:n_allele, 16
|
417
|
+
|
418
|
+
bitfields :n_fmt_sample,
|
419
|
+
:n_fmt, 8,
|
420
|
+
:n_sample, 24
|
432
421
|
end
|
433
422
|
end
|
434
423
|
end
|
data/lib/hts/ffi/hfile.rb
CHANGED
data/lib/hts/ffi/hts.rb
CHANGED
@@ -8,7 +8,7 @@ module HTS
|
|
8
8
|
|
9
9
|
attach_function \
|
10
10
|
:hts_lib_shutdown,
|
11
|
-
[
|
11
|
+
[],
|
12
12
|
:void
|
13
13
|
|
14
14
|
attach_function \
|
@@ -110,7 +110,7 @@ module HTS
|
|
110
110
|
# Read a line (and its \n or \r\n terminator) from a file
|
111
111
|
attach_function \
|
112
112
|
:hts_getline,
|
113
|
-
[HtsFile, :int,
|
113
|
+
[HtsFile, :int, KString],
|
114
114
|
:int
|
115
115
|
|
116
116
|
attach_function \
|
data/lib/hts/ffi/sam.rb
CHANGED
@@ -2,6 +2,24 @@
|
|
2
2
|
|
3
3
|
module HTS
|
4
4
|
module FFI
|
5
|
+
# constants
|
6
|
+
BAM_CMATCH = 0
|
7
|
+
BAM_CINS = 1
|
8
|
+
BAM_CDEL = 2
|
9
|
+
BAM_CREF_SKIP = 3
|
10
|
+
BAM_CSOFT_CLIP = 4
|
11
|
+
BAM_CHARD_CLIP = 5
|
12
|
+
BAM_CPAD = 6
|
13
|
+
BAM_CEQUAL = 7
|
14
|
+
BAM_CDIFF = 8
|
15
|
+
BAM_CBACK = 9
|
16
|
+
|
17
|
+
BAM_CIGAR_STR = 'MIDNSHP=XB'
|
18
|
+
BAM_CIGAR_STR_PADDED = 'MIDNSHP=XB??????'
|
19
|
+
BAM_CIGAR_SHIFT = 4
|
20
|
+
BAM_CIGAR_MASK = 0xf
|
21
|
+
BAM_CIGAR_TYPE = 0x3C1A7
|
22
|
+
|
5
23
|
# macros
|
6
24
|
class << self
|
7
25
|
def bam_cigar_op(c)
|
@@ -13,7 +31,7 @@ module HTS
|
|
13
31
|
end
|
14
32
|
|
15
33
|
def bam_cigar_opchr(c)
|
16
|
-
(BAM_CIGAR_STR
|
34
|
+
("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
|
17
35
|
end
|
18
36
|
|
19
37
|
def bam_cigar_gen(l, o)
|
@@ -25,6 +43,19 @@ module HTS
|
|
25
43
|
end
|
26
44
|
end
|
27
45
|
|
46
|
+
BAM_FPAIRED = 1
|
47
|
+
BAM_FPROPER_PAIR = 2
|
48
|
+
BAM_FUNMAP = 4
|
49
|
+
BAM_FMUNMAP = 8
|
50
|
+
BAM_FREVERSE = 16
|
51
|
+
BAM_FMREVERSE = 32
|
52
|
+
BAM_FREAD1 = 64
|
53
|
+
BAM_FREAD2 = 128
|
54
|
+
BAM_FSECONDARY = 256
|
55
|
+
BAM_FQCFAIL = 512
|
56
|
+
BAM_FDUP = 1024
|
57
|
+
BAM_FSUPPLEMENTARY = 2048
|
58
|
+
|
28
59
|
# macros
|
29
60
|
# function-like macros
|
30
61
|
class << self
|
@@ -148,13 +179,13 @@ module HTS
|
|
148
179
|
# Returns a complete line of formatted text for a given type and ID.
|
149
180
|
attach_function \
|
150
181
|
:sam_hdr_find_line_id,
|
151
|
-
[SamHdr, :string, :string, :string,
|
182
|
+
[SamHdr, :string, :string, :string, KString],
|
152
183
|
:int
|
153
184
|
|
154
185
|
# Returns a complete line of formatted text for a given type and index.
|
155
186
|
attach_function \
|
156
187
|
:sam_hdr_find_line_pos,
|
157
|
-
[SamHdr, :string, :int,
|
188
|
+
[SamHdr, :string, :int, KString],
|
158
189
|
:int
|
159
190
|
|
160
191
|
# Remove a line with given type / id from a header
|
@@ -208,13 +239,13 @@ module HTS
|
|
208
239
|
# Return the value associated with a key for a header line identified by ID_key:ID_val
|
209
240
|
attach_function \
|
210
241
|
:sam_hdr_find_tag_id,
|
211
|
-
[SamHdr, :string, :string, :string, :string,
|
242
|
+
[SamHdr, :string, :string, :string, :string, KString],
|
212
243
|
:int
|
213
244
|
|
214
245
|
# Return the value associated with a key for a header line identified by position
|
215
246
|
attach_function \
|
216
247
|
:sam_hdr_find_tag_pos,
|
217
|
-
[SamHdr, :string, :int, :string,
|
248
|
+
[SamHdr, :string, :int, :string, KString],
|
218
249
|
:int
|
219
250
|
|
220
251
|
# Remove the key from the line identified by type, ID_key and ID_value.
|
@@ -445,12 +476,12 @@ module HTS
|
|
445
476
|
|
446
477
|
attach_function \
|
447
478
|
:sam_parse1,
|
448
|
-
[
|
479
|
+
[KString, SamHdr, Bam1],
|
449
480
|
:int
|
450
481
|
|
451
482
|
attach_function \
|
452
483
|
:sam_format1,
|
453
|
-
[SamHdr, Bam1,
|
484
|
+
[SamHdr, Bam1, KString],
|
454
485
|
:int
|
455
486
|
|
456
487
|
# Read a record from a file
|
@@ -598,7 +629,7 @@ module HTS
|
|
598
629
|
# sets a callback to initialise any per-pileup1_t fields.
|
599
630
|
attach_function \
|
600
631
|
:bam_plp_insertion,
|
601
|
-
[:pointer,
|
632
|
+
[:pointer, KString, :pointer],
|
602
633
|
:int
|
603
634
|
|
604
635
|
# sets a callback to initialise any per-pileup1_t fields.
|
data/lib/hts/ffi/vcf.rb
CHANGED
@@ -2,6 +2,75 @@
|
|
2
2
|
|
3
3
|
module HTS
|
4
4
|
module FFI
|
5
|
+
# constants
|
6
|
+
BCF_HL_FLT = 0 # header line
|
7
|
+
BCF_HL_INFO = 1
|
8
|
+
BCF_HL_FMT = 2
|
9
|
+
BCF_HL_CTG = 3
|
10
|
+
BCF_HL_STR = 4 # structured header line TAG=<A=..,B=..>
|
11
|
+
BCF_HL_GEN = 5 # generic header line
|
12
|
+
BCF_HT_FLAG = 0 # header type
|
13
|
+
|
14
|
+
BCF_HT_INT = 1
|
15
|
+
BCF_HT_REAL = 2
|
16
|
+
BCF_HT_STR = 3
|
17
|
+
BCF_HT_LONG = (BCF_HT_INT | 0x100) # BCF_HT_INT, but for int64_t values; VCF only!
|
18
|
+
|
19
|
+
BCF_VL_FIXED = 0 # variable length
|
20
|
+
BCF_VL_VAR = 1
|
21
|
+
BCF_VL_A = 2
|
22
|
+
BCF_VL_G = 3
|
23
|
+
BCF_VL_R = 4
|
24
|
+
|
25
|
+
BCF_DT_ID = 0 # dictionary type
|
26
|
+
BCF_DT_CTG = 1
|
27
|
+
BCF_DT_SAMPLE = 2
|
28
|
+
|
29
|
+
BCF_BT_NULL = 0
|
30
|
+
BCF_BT_INT8 = 1
|
31
|
+
BCF_BT_INT16 = 2
|
32
|
+
BCF_BT_INT32 = 3
|
33
|
+
BCF_BT_INT64 = 4 # Unofficial, for internal use only.
|
34
|
+
BCF_BT_FLOAT = 5
|
35
|
+
BCF_BT_CHAR = 7
|
36
|
+
|
37
|
+
VCF_REF = 0
|
38
|
+
VCF_SNP = 1
|
39
|
+
VCF_MNP = 2
|
40
|
+
VCF_INDEL = 4
|
41
|
+
VCF_OTHER = 8
|
42
|
+
VCF_BND = 16 # breakend
|
43
|
+
VCF_OVERLAP = 32 # overlapping deletion, ALT=*
|
44
|
+
|
45
|
+
BCF1_DIRTY_ID = 1
|
46
|
+
BCF1_DIRTY_ALS = 2
|
47
|
+
BCF1_DIRTY_FLT = 4
|
48
|
+
BCF1_DIRTY_INF = 8
|
49
|
+
|
50
|
+
BCF_ERR_CTG_UNDEF = 1
|
51
|
+
BCF_ERR_TAG_UNDEF = 2
|
52
|
+
BCF_ERR_NCOLS = 4
|
53
|
+
BCF_ERR_LIMITS = 8
|
54
|
+
BCF_ERR_CHAR = 16
|
55
|
+
BCF_ERR_CTG_INVALID = 32
|
56
|
+
BCF_ERR_TAG_INVALID = 64
|
57
|
+
|
58
|
+
# macros
|
59
|
+
class << self
|
60
|
+
def bcf_hdr_nsamples(hdr)
|
61
|
+
hdr[:n][BCF_DT_SAMPLE]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# constants
|
66
|
+
BCF_UN_STR = 1 # up to ALT inclusive
|
67
|
+
BCF_UN_FLT = 2 # up to FILTER
|
68
|
+
BCF_UN_INFO = 4 # up to INFO
|
69
|
+
BCF_UN_SHR = (BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO) # all shared information
|
70
|
+
BCF_UN_FMT = 8 # unpack format and each sample
|
71
|
+
BCF_UN_IND = BCF_UN_FMT # a synonym of BCF_UN_FMT
|
72
|
+
BCF_UN_ALL = (BCF_UN_SHR | BCF_UN_FMT) # everything
|
73
|
+
|
5
74
|
attach_function \
|
6
75
|
:bcf_hdr_init,
|
7
76
|
[:string],
|
@@ -14,7 +83,7 @@ module HTS
|
|
14
83
|
|
15
84
|
attach_function \
|
16
85
|
:bcf_init,
|
17
|
-
[
|
86
|
+
[],
|
18
87
|
Bcf1.by_ref
|
19
88
|
|
20
89
|
attach_function \
|
@@ -54,7 +123,7 @@ module HTS
|
|
54
123
|
|
55
124
|
attach_function \
|
56
125
|
:vcf_parse,
|
57
|
-
[
|
126
|
+
[KString, BcfHdr, Bcf1],
|
58
127
|
:int
|
59
128
|
|
60
129
|
attach_function \
|
@@ -64,7 +133,7 @@ module HTS
|
|
64
133
|
|
65
134
|
attach_function \
|
66
135
|
:vcf_format,
|
67
|
-
[BcfHdr, Bcf1,
|
136
|
+
[BcfHdr, Bcf1, KString],
|
68
137
|
:int
|
69
138
|
|
70
139
|
attach_function \
|
@@ -119,7 +188,7 @@ module HTS
|
|
119
188
|
|
120
189
|
attach_function \
|
121
190
|
:vcf_write_line,
|
122
|
-
[HtsFile,
|
191
|
+
[HtsFile, KString],
|
123
192
|
:int
|
124
193
|
|
125
194
|
attach_function \
|
@@ -144,7 +213,7 @@ module HTS
|
|
144
213
|
|
145
214
|
attach_function \
|
146
215
|
:bcf_hdr_format,
|
147
|
-
[BcfHdr, :int,
|
216
|
+
[BcfHdr, :int, KString],
|
148
217
|
:int
|
149
218
|
|
150
219
|
attach_function \
|
@@ -204,7 +273,7 @@ module HTS
|
|
204
273
|
|
205
274
|
attach_function \
|
206
275
|
:bcf_hrec_format,
|
207
|
-
[BcfHrec,
|
276
|
+
[BcfHrec, KString],
|
208
277
|
:int
|
209
278
|
|
210
279
|
attach_function \
|
@@ -369,27 +438,27 @@ module HTS
|
|
369
438
|
|
370
439
|
attach_function \
|
371
440
|
:bcf_fmt_array,
|
372
|
-
[
|
441
|
+
[KString, :int, :int, :pointer],
|
373
442
|
:int
|
374
443
|
|
375
444
|
attach_function \
|
376
445
|
:bcf_fmt_sized_array,
|
377
|
-
[
|
446
|
+
[KString, :pointer],
|
378
447
|
:uint8_t
|
379
448
|
|
380
449
|
attach_function \
|
381
450
|
:bcf_enc_vchar,
|
382
|
-
[
|
451
|
+
[KString, :int, :string],
|
383
452
|
:int
|
384
453
|
|
385
454
|
attach_function \
|
386
455
|
:bcf_enc_vint,
|
387
|
-
[
|
456
|
+
[KString, :int, :pointer, :int],
|
388
457
|
:int
|
389
458
|
|
390
459
|
attach_function \
|
391
460
|
:bcf_enc_vfloat,
|
392
|
-
[
|
461
|
+
[KString, :int, :pointer],
|
393
462
|
:int
|
394
463
|
|
395
464
|
attach_function \
|
data/lib/hts/vcf.rb
CHANGED
@@ -3,27 +3,37 @@
|
|
3
3
|
# Based on hts-python
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
5
5
|
|
6
|
+
require_relative 'vcf/header'
|
7
|
+
require_relative 'vcf/variant'
|
8
|
+
|
6
9
|
module HTS
|
7
10
|
class VCF
|
8
|
-
|
11
|
+
include Enumerable
|
12
|
+
attr_reader :file_path, :mode, :header, :htf
|
9
13
|
|
10
|
-
def
|
14
|
+
def initialize(file_path, mode = 'r')
|
15
|
+
@file_path = File.expand_path(file_path)
|
16
|
+
File.exist?(@file_path) || raise("No such VCF/BCF file - #{@file_path}")
|
11
17
|
|
12
|
-
|
18
|
+
@mode = mode
|
19
|
+
@htf = FFI.hts_open(@file_path, mode)
|
13
20
|
|
14
|
-
|
21
|
+
@header = VCF::Header.new(FFI.bcf_hdr_read(@htf))
|
15
22
|
|
16
|
-
|
17
|
-
|
23
|
+
@c = FFI.bcf_init
|
24
|
+
end
|
18
25
|
|
19
|
-
|
20
|
-
def initialize; end
|
26
|
+
# def inspect; end
|
21
27
|
|
22
|
-
def
|
28
|
+
def each(&block)
|
29
|
+
block.call(Variant.new(@c, self)) while FFI.bcf_read(@htf, @header.h, @c) != -1
|
30
|
+
end
|
23
31
|
|
24
|
-
def
|
32
|
+
def seq(tid); end
|
25
33
|
|
26
|
-
def
|
34
|
+
def n_samples
|
35
|
+
FFI.bcf_hdr_nsamples(header.h)
|
36
|
+
end
|
27
37
|
end
|
28
38
|
|
29
39
|
class Format
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class VCF
|
5
|
+
class Header
|
6
|
+
attr_reader :h
|
7
|
+
|
8
|
+
def initialize(h)
|
9
|
+
@h = h
|
10
|
+
end
|
11
|
+
|
12
|
+
# FIXME: better name?
|
13
|
+
def seqs
|
14
|
+
Array.new(@h[:n_targets]) do |i|
|
15
|
+
FFI.sam_hdr_tid2name(@h, i)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def text
|
20
|
+
FFI.sam_hdr_str(@h)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTS
|
4
|
+
class VCF
|
5
|
+
class Variant
|
6
|
+
def initialize(bcf_t, vcf)
|
7
|
+
@c = bcf_t
|
8
|
+
FFI.bcf_unpack(@c, HTS::FFI::BCF_UN_ALL) # FIXME
|
9
|
+
@vcf = vcf
|
10
|
+
end
|
11
|
+
|
12
|
+
# def inspect; end
|
13
|
+
|
14
|
+
def formats; end
|
15
|
+
|
16
|
+
def genotypes; end
|
17
|
+
|
18
|
+
def pos
|
19
|
+
@c[:pos] + 1 # FIXME
|
20
|
+
end
|
21
|
+
|
22
|
+
def start
|
23
|
+
@c[:pos]
|
24
|
+
end
|
25
|
+
|
26
|
+
def stop
|
27
|
+
@c[:pos] + @c[:rlen]
|
28
|
+
end
|
29
|
+
|
30
|
+
def id
|
31
|
+
@c[:d][:id]
|
32
|
+
end
|
33
|
+
|
34
|
+
def qual
|
35
|
+
@c[:qual]
|
36
|
+
end
|
37
|
+
|
38
|
+
def ref
|
39
|
+
@c[:d][:allele].get_pointer(0).read_string
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/hts/version.rb
CHANGED
data/lib/htslib.rb
CHANGED
@@ -9,15 +9,33 @@ module HTS
|
|
9
9
|
|
10
10
|
class << self
|
11
11
|
attr_accessor :ffi_lib
|
12
|
-
end
|
13
|
-
|
14
|
-
suffix = ::FFI::Platform::LIBSUFFIX
|
15
12
|
|
16
|
-
|
17
|
-
|
13
|
+
def search_htslib(name = nil)
|
14
|
+
name ||= "libhts.#{::FFI::Platform::LIBSUFFIX}"
|
15
|
+
lib_path = if ENV['HTSLIBDIR']
|
16
|
+
File.expand_path(name, ENV['HTSLIBDIR'])
|
18
17
|
else
|
19
|
-
File.expand_path("../vendor
|
18
|
+
File.expand_path("../vendor/#{name}", __dir__)
|
20
19
|
end
|
20
|
+
return lib_path if File.exist?(lib_path)
|
21
|
+
|
22
|
+
begin
|
23
|
+
require 'pkg-config'
|
24
|
+
lib_dir = PKGConfig.variable('htslib', 'libdir')
|
25
|
+
lib_path = File.expand_path(name, lib_dir)
|
26
|
+
rescue PackageConfig::NotFoundError
|
27
|
+
warn "htslib.pc was not found in the pkg-config search path."
|
28
|
+
end
|
29
|
+
return lib_path if File.exist?(lib_path)
|
30
|
+
|
31
|
+
warn "htslib shared library '#{name}' not found."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
self.ffi_lib = search_htslib
|
36
|
+
|
37
|
+
# You can change the path of the shared library with `HTS.ffi_lib=`
|
38
|
+
# before calling the FFI module.
|
21
39
|
autoload :FFI, 'hts/ffi'
|
22
40
|
end
|
23
41
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: htslib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -25,13 +25,13 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: pkg-config
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
|
-
type: :
|
34
|
+
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -53,7 +53,7 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: minitest
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
@@ -108,7 +108,7 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
-
description:
|
111
|
+
description:
|
112
112
|
email:
|
113
113
|
- 2xijok@gmail.com
|
114
114
|
executables: []
|
@@ -134,13 +134,15 @@ files:
|
|
134
134
|
- lib/hts/ffi/vcf.rb
|
135
135
|
- lib/hts/tbx.rb
|
136
136
|
- lib/hts/vcf.rb
|
137
|
+
- lib/hts/vcf/header.rb
|
138
|
+
- lib/hts/vcf/variant.rb
|
137
139
|
- lib/hts/version.rb
|
138
140
|
- lib/htslib.rb
|
139
141
|
homepage: https://github.com/kojix2/ruby-htslib
|
140
142
|
licenses:
|
141
143
|
- MIT
|
142
144
|
metadata: {}
|
143
|
-
post_install_message:
|
145
|
+
post_install_message:
|
144
146
|
rdoc_options: []
|
145
147
|
require_paths:
|
146
148
|
- lib
|
@@ -155,8 +157,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
157
|
- !ruby/object:Gem::Version
|
156
158
|
version: '0'
|
157
159
|
requirements: []
|
158
|
-
rubygems_version: 3.
|
159
|
-
signing_key:
|
160
|
+
rubygems_version: 3.2.15
|
161
|
+
signing_key:
|
160
162
|
specification_version: 4
|
161
163
|
summary: HTSlib bindings for Ruby
|
162
164
|
test_files: []
|