htslib 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -16
- data/lib/hts/bam.rb +38 -32
- data/lib/hts/bam/cigar.rb +3 -3
- data/lib/hts/bam/flag.rb +91 -0
- data/lib/hts/bam/header.rb +2 -2
- data/lib/hts/bam/{alignment.rb → record.rb} +49 -43
- data/lib/hts/fai.rb +8 -8
- data/lib/hts/libhts.rb +141 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +114 -43
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +7 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +25 -25
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +1 -1
- data/lib/hts/vcf.rb +17 -17
- data/lib/hts/vcf/format.rb +24 -0
- data/lib/hts/vcf/header.rb +2 -2
- data/lib/hts/vcf/info.rb +24 -0
- data/lib/hts/vcf/{variant.rb → record.rb} +2 -2
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +20 -17
- data/lib/hts/ffi.rb +0 -85
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30f42b474bc317136d665b00781fbfcb11caaf588b091e76bc86bf9cdf8d5e3f
|
4
|
+
data.tar.gz: d48e5f74fb0efed4de5af2955b0093d1eb7ef5ee7767bc7ee50bf3e296e7ce28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c1bf27a8fdc04a4a9ba678923df5bb579439c286802a5d1f2a4e6f11d7102217eafa0e4e42c2fa853e9ee82c706756315a0a1d6f97c5b5fab58ee909add4eb0
|
7
|
+
data.tar.gz: 59219371057e45cf31951eda2dae250acaedd12c593c09fb08f19720818be514797c57e9b45be8e1f6ea60f2fbc79fe6038a1aced6ba66d8f39a544eb6516a0a
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# ruby-htslib
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/htslib)
|
4
4
|

|
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
:apple: Feel free to fork it out if you can develop it!
|
12
12
|
|
13
|
-
:bowtie:
|
13
|
+
:bowtie: alpha stage.
|
14
14
|
|
15
15
|
## Requirements
|
16
16
|
|
@@ -24,7 +24,7 @@
|
|
24
24
|
gem install htslib
|
25
25
|
```
|
26
26
|
|
27
|
-
If you installed htslib with Ubuntu/apt or Mac/homebrew, pkg-config will automatically detect the location of the shared library.
|
27
|
+
If you installed htslib with Ubuntu/apt or Mac/homebrew, [pkg-config](https://github.com/ruby-gnome/pkg-config) will automatically detect the location of the shared library.
|
28
28
|
|
29
29
|
Or you can set the environment variable `HTSLIBDIR`.
|
30
30
|
|
@@ -34,33 +34,39 @@ export HTSLIBDIR="/your/path/to/htslib" # libhts.so
|
|
34
34
|
|
35
35
|
## Usage
|
36
36
|
|
37
|
-
|
37
|
+
### Low level API
|
38
|
+
|
39
|
+
HTS::LibHTS
|
38
40
|
|
39
41
|
```ruby
|
40
42
|
require 'htslib'
|
41
43
|
|
42
|
-
a = HTS::
|
43
|
-
b = HTS::
|
44
|
+
a = HTS::LibHTS.hts_open("a.bam", "r")
|
45
|
+
b = HTS::LibHTS.hts_get_format(a)
|
44
46
|
p b[:category]
|
45
47
|
p b[:format]
|
46
48
|
```
|
47
49
|
|
48
|
-
|
50
|
+
Note: Managed struct is not used in ruby-htslib. You may need to free the memory by yourself.
|
51
|
+
|
52
|
+
### High level API
|
53
|
+
|
54
|
+
A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) or [hts-nim](https://github.com/brentp/hts-nim) is under development. We will change and improve the API to make it better.
|
49
55
|
|
50
56
|
```ruby
|
51
57
|
require 'htslib'
|
52
58
|
|
53
59
|
bam = HTS::Bam.new("a.bam")
|
54
60
|
|
55
|
-
bam.each do |
|
56
|
-
p name:
|
57
|
-
flag:
|
58
|
-
start:
|
59
|
-
mpos:
|
60
|
-
mqual:
|
61
|
-
seq:
|
62
|
-
cigar:
|
63
|
-
qual:
|
61
|
+
bam.each do |r|
|
62
|
+
p name: r.qname,
|
63
|
+
flag: r.flag,
|
64
|
+
start: r.start + 1,
|
65
|
+
mpos: r.mate_pos + 1,
|
66
|
+
mqual: r.mapping_quality,
|
67
|
+
seq: r.sequence,
|
68
|
+
cigar: r.cigar.to_s,
|
69
|
+
qual: r.base_qualities.map { |i| (i + 33).chr }.join
|
64
70
|
end
|
65
71
|
```
|
66
72
|
|
@@ -80,6 +86,9 @@ bundle exec rake htslib:build
|
|
80
86
|
bundle exec rake test
|
81
87
|
```
|
82
88
|
|
89
|
+
[c2ffi](https://github.com/rpav/c2ffi) :
|
90
|
+
I am trying to find a way to automatically generate a low-level API using c2ffi.
|
91
|
+
|
83
92
|
## Contributing
|
84
93
|
|
85
94
|
Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
|
data/lib/hts/bam.rb
CHANGED
@@ -3,76 +3,82 @@
|
|
3
3
|
# Based on hts-python
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
5
5
|
|
6
|
-
require_relative
|
7
|
-
require_relative
|
8
|
-
require_relative
|
6
|
+
require_relative "bam/header"
|
7
|
+
require_relative "bam/cigar"
|
8
|
+
require_relative "bam/flag"
|
9
|
+
require_relative "bam/record"
|
9
10
|
|
10
11
|
module HTS
|
11
12
|
class Bam
|
12
13
|
include Enumerable
|
13
|
-
attr_reader :file_path, :mode, :
|
14
|
+
attr_reader :file_path, :mode, :htf, :header
|
14
15
|
|
15
|
-
def initialize(file_path, mode =
|
16
|
-
|
17
|
-
File.exist?(@file_path) || raise("No such SAM/BAM file - #{@file_path}")
|
16
|
+
def initialize(file_path, mode = "r", create_index: nil)
|
17
|
+
file_path = File.expand_path(file_path)
|
18
18
|
|
19
|
-
|
20
|
-
@htf = FFI.hts_open(@file_path, mode)
|
19
|
+
raise("No such SAM/BAM file - #{file_path}") unless File.exist?(file_path)
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
@header = Bam::Header.new(FFI.sam_hdr_read(@htf))
|
30
|
-
@b = FFI.bam_init1
|
21
|
+
@file_path = file_path
|
22
|
+
@mode = mode
|
23
|
+
@htf = LibHTS.hts_open(@file_path, mode)
|
24
|
+
@header = Bam::Header.new(LibHTS.sam_hdr_read(htf))
|
25
|
+
# FIXME: should be defined here?
|
26
|
+
@b = LibHTS.bam_init1
|
31
27
|
|
28
|
+
# read
|
29
|
+
if mode[0] == "r"
|
30
|
+
# load index
|
31
|
+
@idx = LibHTS.sam_index_load(htf, file_path)
|
32
|
+
# create index
|
33
|
+
if create_index || (@idx.null? && create_index.nil?)
|
34
|
+
warn "Create index for #{file_path}"
|
35
|
+
LibHTS.sam_index_build(file_path, -1)
|
36
|
+
@idx = LibHTS.sam_index_load(@htf, @file_path)
|
37
|
+
end
|
32
38
|
else
|
33
|
-
# FIXME
|
34
|
-
raise
|
35
|
-
|
39
|
+
# FIXME: implement
|
40
|
+
raise "not implemented yet."
|
36
41
|
end
|
37
42
|
end
|
38
43
|
|
39
|
-
def self.header_from_fasta; end
|
40
|
-
|
41
44
|
def write(alns)
|
42
45
|
alns.each do
|
43
|
-
|
46
|
+
LibHTS.sam_write1(htf, header, alns.b) > 0 || raise
|
44
47
|
end
|
45
48
|
end
|
46
49
|
|
47
50
|
# Close the current file.
|
48
51
|
def close
|
49
|
-
|
52
|
+
LibHTS.hts_close(htf)
|
50
53
|
end
|
51
54
|
|
52
55
|
# Flush the current file.
|
53
56
|
def flush
|
54
57
|
raise
|
55
|
-
#
|
58
|
+
# LibHTS.bgzf_flush(@htf.fp.bgzf)
|
56
59
|
end
|
57
60
|
|
58
61
|
def each(&block)
|
59
62
|
# Each does not always start at the beginning of the file.
|
60
63
|
# This is the common behavior of IO objects in Ruby.
|
61
64
|
# This may change in the future.
|
62
|
-
|
65
|
+
while LibHTS.sam_read1(htf, header.h, @b) > 0
|
66
|
+
record = Record.new(@b, header.h)
|
67
|
+
block.call(record)
|
68
|
+
end
|
63
69
|
end
|
64
70
|
|
65
71
|
# query [WIP]
|
66
72
|
def query(region)
|
67
|
-
qiter =
|
73
|
+
qiter = LibHTS.sam_itr_querys(@idx, header.h, region)
|
68
74
|
begin
|
69
|
-
slen =
|
75
|
+
slen = LibHTS.sam_itr_next(htf, qiter, @b)
|
70
76
|
while slen > 0
|
71
|
-
yield
|
72
|
-
slen =
|
77
|
+
yield Record.new(@b, header.h)
|
78
|
+
slen = LibHTS.sam_itr_next(htf, qiter, @b)
|
73
79
|
end
|
74
80
|
ensure
|
75
|
-
|
81
|
+
LibHTS.hts_itr_destroy(qiter)
|
76
82
|
end
|
77
83
|
end
|
78
84
|
end
|
data/lib/hts/bam/cigar.rb
CHANGED
@@ -7,7 +7,7 @@ module HTS
|
|
7
7
|
class Bam
|
8
8
|
class Cigar
|
9
9
|
include Enumerable
|
10
|
-
OPS =
|
10
|
+
OPS = "MIDNSHP=XB"
|
11
11
|
|
12
12
|
def initialize(cigar, n_cigar)
|
13
13
|
@c = cigar
|
@@ -21,8 +21,8 @@ module HTS
|
|
21
21
|
def each
|
22
22
|
@n_cigar.times do |i|
|
23
23
|
c = @c[i].read_uint32
|
24
|
-
yield [
|
25
|
-
|
24
|
+
yield [LibHTS.bam_cigar_oplen(c),
|
25
|
+
LibHTS.bam_cigar_opchr(c)]
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
data/lib/hts/bam/flag.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-nim
|
4
|
+
# https://github.com/brentp/hts-nim/blob/master/src/hts/bam/flag.nim
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Bam
|
8
|
+
class Flag
|
9
|
+
def initialize(flag_value)
|
10
|
+
@value = flag_value # tytpe check?
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_accessor :value
|
14
|
+
|
15
|
+
# BAM_FPAIRED = 1
|
16
|
+
# BAM_FPROPER_PAIR = 2
|
17
|
+
# BAM_FUNMAP = 4
|
18
|
+
# BAM_FMUNMAP = 8
|
19
|
+
# BAM_FREVERSE = 16
|
20
|
+
# BAM_FMREVERSE = 32
|
21
|
+
# BAM_FREAD1 = 64
|
22
|
+
# BAM_FREAD2 = 128
|
23
|
+
# BAM_FSECONDARY = 256
|
24
|
+
# BAM_FQCFAIL = 512
|
25
|
+
# BAM_FDUP = 1024
|
26
|
+
# BAM_FSUPPLEMENTARY = 2048
|
27
|
+
|
28
|
+
# TODO: Enabling bitwise operations
|
29
|
+
# hts-nim
|
30
|
+
# proc `and`*(f: Flag, o: uint16): uint16 {. borrow, inline .}
|
31
|
+
# proc `and`*(f: Flag, o: Flag): uint16 {. borrow, inline .}
|
32
|
+
# proc `or`*(f: Flag, o: uint16): uint16 {. borrow .}
|
33
|
+
# proc `or`*(o: uint16, f: Flag): uint16 {. borrow .}
|
34
|
+
# proc `==`*(f: Flag, o: Flag): bool {. borrow, inline .}
|
35
|
+
# proc `==`*(f: Flag, o: uint16): bool {. borrow, inline .}
|
36
|
+
# proc `==`*(o: uint16, f: Flag): bool {. borrow, inline .}
|
37
|
+
|
38
|
+
def paired?
|
39
|
+
has_flag? LibHTS::BAM_FPAIRED
|
40
|
+
end
|
41
|
+
|
42
|
+
def proper_pair?
|
43
|
+
has_flag? LibHTS::BAM_FPROPER_PAIR
|
44
|
+
end
|
45
|
+
|
46
|
+
def unmapped?
|
47
|
+
has_flag? LibHTS::BAM_FUNMAP
|
48
|
+
end
|
49
|
+
|
50
|
+
def mate_unmapped?
|
51
|
+
has_flag? LibHTS::BAM_FMUNMAP
|
52
|
+
end
|
53
|
+
|
54
|
+
def reverse?
|
55
|
+
has_flag? LibHTS::BAM_FREVERSE
|
56
|
+
end
|
57
|
+
|
58
|
+
def mate_reverse?
|
59
|
+
has_flag? LibHTS::BAM_FMREVERSE
|
60
|
+
end
|
61
|
+
|
62
|
+
def read1?
|
63
|
+
has_flag? LibHTS::BAM_FREAD1
|
64
|
+
end
|
65
|
+
|
66
|
+
def read2?
|
67
|
+
has_flag? LibHTS::BAM_FREAD2
|
68
|
+
end
|
69
|
+
|
70
|
+
def secondary?
|
71
|
+
has_flag? LibHTS::BAM_FSECONDARY
|
72
|
+
end
|
73
|
+
|
74
|
+
def qcfail?
|
75
|
+
has_flag? LibHTS::BAM_FQCFAIL
|
76
|
+
end
|
77
|
+
|
78
|
+
def dup?
|
79
|
+
has_flag? LibHTS::BAM_FDUP
|
80
|
+
end
|
81
|
+
|
82
|
+
def supplementary?
|
83
|
+
has_flag? LibHTS::BAM_FSUPPLEMENTARY
|
84
|
+
end
|
85
|
+
|
86
|
+
def has_flag?(o)
|
87
|
+
(@value & o) != 0
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/hts/bam/header.rb
CHANGED
@@ -15,12 +15,12 @@ module HTS
|
|
15
15
|
# FIXME: better name?
|
16
16
|
def seqs
|
17
17
|
Array.new(@h[:n_targets]) do |i|
|
18
|
-
|
18
|
+
LibHTS.sam_hdr_tid2name(@h, i)
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
22
|
def text
|
23
|
-
|
23
|
+
LibHTS.sam_hdr_str(@h)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
@@ -5,7 +5,9 @@
|
|
5
5
|
|
6
6
|
module HTS
|
7
7
|
class Bam
|
8
|
-
class
|
8
|
+
class Record
|
9
|
+
SEQ_NT16_STR = "=ACMGRSVTWYHKDBN"
|
10
|
+
|
9
11
|
def initialize(bam1_t, bam_hdr_t)
|
10
12
|
@b = bam1_t
|
11
13
|
@h = bam_hdr_t
|
@@ -19,9 +21,9 @@ module HTS
|
|
19
21
|
|
20
22
|
def tags; end
|
21
23
|
|
22
|
-
#
|
24
|
+
# returns the query name.
|
23
25
|
def qname
|
24
|
-
|
26
|
+
LibHTS.bam_get_qname(@b).read_string
|
25
27
|
end
|
26
28
|
|
27
29
|
# Set (query) name.
|
@@ -29,12 +31,9 @@ module HTS
|
|
29
31
|
# raise 'Not Implemented'
|
30
32
|
# end
|
31
33
|
|
32
|
-
# returns the
|
33
|
-
def
|
34
|
-
|
35
|
-
return '' if tid == -1
|
36
|
-
|
37
|
-
FFI.sam_hdr_tid2name(@h, tid)
|
34
|
+
# returns the tid of the record or -1 if not mapped.
|
35
|
+
def tid
|
36
|
+
@b[:core][:tid]
|
38
37
|
end
|
39
38
|
|
40
39
|
# returns the tid of the mate or -1 if not mapped.
|
@@ -42,16 +41,6 @@ module HTS
|
|
42
41
|
@b[:core][:mtid]
|
43
42
|
end
|
44
43
|
|
45
|
-
# returns the tid of the alignment or -1 if not mapped.
|
46
|
-
def tid
|
47
|
-
@b[:core][:tid]
|
48
|
-
end
|
49
|
-
|
50
|
-
# mate position
|
51
|
-
def mate_pos
|
52
|
-
@b[:core][:mpos]
|
53
|
-
end
|
54
|
-
|
55
44
|
# returns 0-based start position.
|
56
45
|
def start
|
57
46
|
@b[:core][:pos]
|
@@ -59,19 +48,33 @@ module HTS
|
|
59
48
|
|
60
49
|
# returns end position of the read.
|
61
50
|
def stop
|
62
|
-
|
51
|
+
LibHTS.bam_endpos @b
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns 0-based mate position
|
55
|
+
def mate_start
|
56
|
+
@b[:core][:mpos]
|
63
57
|
end
|
58
|
+
alias mate_pos mate_start
|
64
59
|
|
65
60
|
# returns the chromosome or '' if not mapped.
|
66
61
|
def chrom
|
67
62
|
tid = @b[:core][:tid]
|
68
|
-
return
|
63
|
+
return "" if tid == -1
|
64
|
+
|
65
|
+
LibHTS.sam_hdr_tid2name(@h, tid)
|
66
|
+
end
|
67
|
+
|
68
|
+
# returns the chromosome of the mate or '' if not mapped.
|
69
|
+
def mate_chrom
|
70
|
+
tid = @b[:core][:mtid]
|
71
|
+
return "" if tid == -1
|
69
72
|
|
70
|
-
|
73
|
+
LibHTS.sam_hdr_tid2name(@h, tid)
|
71
74
|
end
|
72
75
|
|
73
76
|
def strand
|
74
|
-
|
77
|
+
LibHTS.bam_is_rev(@b) ? "-" : "+"
|
75
78
|
end
|
76
79
|
|
77
80
|
# def start=(v)
|
@@ -90,61 +93,64 @@ module HTS
|
|
90
93
|
|
91
94
|
# returns a `Cigar` object.
|
92
95
|
def cigar
|
93
|
-
Cigar.new(
|
96
|
+
Cigar.new(LibHTS.bam_get_cigar(@b), @b[:core][:n_cigar])
|
94
97
|
end
|
95
98
|
|
96
99
|
def qlen
|
97
|
-
|
100
|
+
LibHTS.bam_cigar2qlen(
|
98
101
|
@b[:core][:n_cigar],
|
99
|
-
|
102
|
+
LibHTS.bam_get_cigar(@b)
|
100
103
|
)
|
101
104
|
end
|
102
105
|
|
103
106
|
def rlen
|
104
|
-
|
107
|
+
LibHTS.bam_cigar2rlen(
|
105
108
|
@b[:core][:n_cigar],
|
106
|
-
|
109
|
+
LibHTS.bam_get_cigar(@b)
|
107
110
|
)
|
108
111
|
end
|
109
112
|
|
110
113
|
# return the read sequence
|
111
114
|
def sequence
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
end
|
115
|
+
r = LibHTS.bam_get_seq(@b)
|
116
|
+
seq = String.new
|
117
|
+
(@b[:core][:l_qseq]).times do |i|
|
118
|
+
seq << SEQ_NT16_STR[LibHTS.bam_seqi(r, i)]
|
119
|
+
end
|
120
|
+
seq
|
117
121
|
end
|
118
122
|
|
123
|
+
# return only the base of the requested index "i" of the query sequence.
|
119
124
|
def base_at(n)
|
120
125
|
n += @b[:core][:l_qseq] if n < 0
|
121
|
-
|
122
|
-
return '.' if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
|
126
|
+
return "." if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
|
123
127
|
|
124
|
-
r =
|
125
|
-
|
128
|
+
r = LibHTS.bam_get_seq(@b)
|
129
|
+
SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
|
126
130
|
end
|
127
131
|
|
132
|
+
# return the base qualities
|
128
133
|
def base_qualities
|
129
|
-
q_ptr =
|
134
|
+
q_ptr = LibHTS.bam_get_qual(@b)
|
130
135
|
q_ptr.read_array_of_uint8(@b[:core][:l_qseq])
|
131
136
|
end
|
132
137
|
|
138
|
+
# return only the base quality of the requested index "i" of the query sequence.
|
133
139
|
def base_quality_at(n)
|
134
|
-
n += @b[:core][:l_qseq] if n < 0
|
135
|
-
return 0 if (n >= @b[:core][:l_qseq]) || (n < 0)
|
140
|
+
n += @b[:core][:l_qseq] if n < 0
|
141
|
+
return 0 if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_quality_at(-1000)
|
136
142
|
|
137
|
-
q_ptr =
|
143
|
+
q_ptr = LibHTS.bam_get_qual(@b)
|
138
144
|
q_ptr.get_uint8(n)
|
139
145
|
end
|
140
146
|
|
141
147
|
def flag_str
|
142
|
-
|
148
|
+
LibHTS.bam_flag2str(@b[:core][:flag])
|
143
149
|
end
|
144
150
|
|
145
151
|
# returns a `Flag` object.
|
146
152
|
def flag
|
147
|
-
@b[:core][:flag]
|
153
|
+
Flag.new(@b[:core][:flag])
|
148
154
|
end
|
149
155
|
|
150
156
|
# TODO:
|