htslib 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +25 -16
- data/lib/hts/bam.rb +38 -32
- data/lib/hts/bam/cigar.rb +3 -3
- data/lib/hts/bam/flag.rb +91 -0
- data/lib/hts/bam/header.rb +2 -2
- data/lib/hts/bam/{alignment.rb → record.rb} +49 -43
- data/lib/hts/fai.rb +8 -8
- data/lib/hts/libhts.rb +141 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +114 -43
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +7 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +25 -25
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +1 -1
- data/lib/hts/vcf.rb +17 -17
- data/lib/hts/vcf/format.rb +24 -0
- data/lib/hts/vcf/header.rb +2 -2
- data/lib/hts/vcf/info.rb +24 -0
- data/lib/hts/vcf/{variant.rb → record.rb} +2 -2
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +20 -17
- data/lib/hts/ffi.rb +0 -85
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30f42b474bc317136d665b00781fbfcb11caaf588b091e76bc86bf9cdf8d5e3f
|
4
|
+
data.tar.gz: d48e5f74fb0efed4de5af2955b0093d1eb7ef5ee7767bc7ee50bf3e296e7ce28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c1bf27a8fdc04a4a9ba678923df5bb579439c286802a5d1f2a4e6f11d7102217eafa0e4e42c2fa853e9ee82c706756315a0a1d6f97c5b5fab58ee909add4eb0
|
7
|
+
data.tar.gz: 59219371057e45cf31951eda2dae250acaedd12c593c09fb08f19720818be514797c57e9b45be8e1f6ea60f2fbc79fe6038a1aced6ba66d8f39a544eb6516a0a
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# ruby-htslib
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/htslib.svg)](https://badge.fury.io/rb/htslib)
|
4
4
|
![CI](https://github.com/kojix2/ruby-htslib/workflows/CI/badge.svg)
|
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
:apple: Feel free to fork it out if you can develop it!
|
12
12
|
|
13
|
-
:bowtie:
|
13
|
+
:bowtie: alpha stage.
|
14
14
|
|
15
15
|
## Requirements
|
16
16
|
|
@@ -24,7 +24,7 @@
|
|
24
24
|
gem install htslib
|
25
25
|
```
|
26
26
|
|
27
|
-
If you installed htslib with Ubuntu/apt or Mac/homebrew, pkg-config will automatically detect the location of the shared library.
|
27
|
+
If you installed htslib with Ubuntu/apt or Mac/homebrew, [pkg-config](https://github.com/ruby-gnome/pkg-config) will automatically detect the location of the shared library.
|
28
28
|
|
29
29
|
Or you can set the environment variable `HTSLIBDIR`.
|
30
30
|
|
@@ -34,33 +34,39 @@ export HTSLIBDIR="/your/path/to/htslib" # libhts.so
|
|
34
34
|
|
35
35
|
## Usage
|
36
36
|
|
37
|
-
|
37
|
+
### Low level API
|
38
|
+
|
39
|
+
HTS::LibHTS
|
38
40
|
|
39
41
|
```ruby
|
40
42
|
require 'htslib'
|
41
43
|
|
42
|
-
a = HTS::
|
43
|
-
b = HTS::
|
44
|
+
a = HTS::LibHTS.hts_open("a.bam", "r")
|
45
|
+
b = HTS::LibHTS.hts_get_format(a)
|
44
46
|
p b[:category]
|
45
47
|
p b[:format]
|
46
48
|
```
|
47
49
|
|
48
|
-
|
50
|
+
Note: Managed struct is not used in ruby-htslib. You may need to free the memory by yourself.
|
51
|
+
|
52
|
+
### High level API
|
53
|
+
|
54
|
+
A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) or [hts-nim](https://github.com/brentp/hts-nim) is under development. We will change and improve the API to make it better.
|
49
55
|
|
50
56
|
```ruby
|
51
57
|
require 'htslib'
|
52
58
|
|
53
59
|
bam = HTS::Bam.new("a.bam")
|
54
60
|
|
55
|
-
bam.each do |
|
56
|
-
p name:
|
57
|
-
flag:
|
58
|
-
start:
|
59
|
-
mpos:
|
60
|
-
mqual:
|
61
|
-
seq:
|
62
|
-
cigar:
|
63
|
-
qual:
|
61
|
+
bam.each do |r|
|
62
|
+
p name: r.qname,
|
63
|
+
flag: r.flag,
|
64
|
+
start: r.start + 1,
|
65
|
+
mpos: r.mate_pos + 1,
|
66
|
+
mqual: r.mapping_quality,
|
67
|
+
seq: r.sequence,
|
68
|
+
cigar: r.cigar.to_s,
|
69
|
+
qual: r.base_qualities.map { |i| (i + 33).chr }.join
|
64
70
|
end
|
65
71
|
```
|
66
72
|
|
@@ -80,6 +86,9 @@ bundle exec rake htslib:build
|
|
80
86
|
bundle exec rake test
|
81
87
|
```
|
82
88
|
|
89
|
+
[c2ffi](https://github.com/rpav/c2ffi) :
|
90
|
+
I am trying to find a way to automatically generate a low-level API using c2ffi.
|
91
|
+
|
83
92
|
## Contributing
|
84
93
|
|
85
94
|
Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
|
data/lib/hts/bam.rb
CHANGED
@@ -3,76 +3,82 @@
|
|
3
3
|
# Based on hts-python
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
5
5
|
|
6
|
-
require_relative
|
7
|
-
require_relative
|
8
|
-
require_relative
|
6
|
+
require_relative "bam/header"
|
7
|
+
require_relative "bam/cigar"
|
8
|
+
require_relative "bam/flag"
|
9
|
+
require_relative "bam/record"
|
9
10
|
|
10
11
|
module HTS
|
11
12
|
class Bam
|
12
13
|
include Enumerable
|
13
|
-
attr_reader :file_path, :mode, :
|
14
|
+
attr_reader :file_path, :mode, :htf, :header
|
14
15
|
|
15
|
-
def initialize(file_path, mode =
|
16
|
-
|
17
|
-
File.exist?(@file_path) || raise("No such SAM/BAM file - #{@file_path}")
|
16
|
+
def initialize(file_path, mode = "r", create_index: nil)
|
17
|
+
file_path = File.expand_path(file_path)
|
18
18
|
|
19
|
-
|
20
|
-
@htf = FFI.hts_open(@file_path, mode)
|
19
|
+
raise("No such SAM/BAM file - #{file_path}") unless File.exist?(file_path)
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
@header = Bam::Header.new(FFI.sam_hdr_read(@htf))
|
30
|
-
@b = FFI.bam_init1
|
21
|
+
@file_path = file_path
|
22
|
+
@mode = mode
|
23
|
+
@htf = LibHTS.hts_open(@file_path, mode)
|
24
|
+
@header = Bam::Header.new(LibHTS.sam_hdr_read(htf))
|
25
|
+
# FIXME: should be defined here?
|
26
|
+
@b = LibHTS.bam_init1
|
31
27
|
|
28
|
+
# read
|
29
|
+
if mode[0] == "r"
|
30
|
+
# load index
|
31
|
+
@idx = LibHTS.sam_index_load(htf, file_path)
|
32
|
+
# create index
|
33
|
+
if create_index || (@idx.null? && create_index.nil?)
|
34
|
+
warn "Create index for #{file_path}"
|
35
|
+
LibHTS.sam_index_build(file_path, -1)
|
36
|
+
@idx = LibHTS.sam_index_load(@htf, @file_path)
|
37
|
+
end
|
32
38
|
else
|
33
|
-
# FIXME
|
34
|
-
raise
|
35
|
-
|
39
|
+
# FIXME: implement
|
40
|
+
raise "not implemented yet."
|
36
41
|
end
|
37
42
|
end
|
38
43
|
|
39
|
-
def self.header_from_fasta; end
|
40
|
-
|
41
44
|
def write(alns)
|
42
45
|
alns.each do
|
43
|
-
|
46
|
+
LibHTS.sam_write1(htf, header, alns.b) > 0 || raise
|
44
47
|
end
|
45
48
|
end
|
46
49
|
|
47
50
|
# Close the current file.
|
48
51
|
def close
|
49
|
-
|
52
|
+
LibHTS.hts_close(htf)
|
50
53
|
end
|
51
54
|
|
52
55
|
# Flush the current file.
|
53
56
|
def flush
|
54
57
|
raise
|
55
|
-
#
|
58
|
+
# LibHTS.bgzf_flush(@htf.fp.bgzf)
|
56
59
|
end
|
57
60
|
|
58
61
|
def each(&block)
|
59
62
|
# Each does not always start at the beginning of the file.
|
60
63
|
# This is the common behavior of IO objects in Ruby.
|
61
64
|
# This may change in the future.
|
62
|
-
|
65
|
+
while LibHTS.sam_read1(htf, header.h, @b) > 0
|
66
|
+
record = Record.new(@b, header.h)
|
67
|
+
block.call(record)
|
68
|
+
end
|
63
69
|
end
|
64
70
|
|
65
71
|
# query [WIP]
|
66
72
|
def query(region)
|
67
|
-
qiter =
|
73
|
+
qiter = LibHTS.sam_itr_querys(@idx, header.h, region)
|
68
74
|
begin
|
69
|
-
slen =
|
75
|
+
slen = LibHTS.sam_itr_next(htf, qiter, @b)
|
70
76
|
while slen > 0
|
71
|
-
yield
|
72
|
-
slen =
|
77
|
+
yield Record.new(@b, header.h)
|
78
|
+
slen = LibHTS.sam_itr_next(htf, qiter, @b)
|
73
79
|
end
|
74
80
|
ensure
|
75
|
-
|
81
|
+
LibHTS.hts_itr_destroy(qiter)
|
76
82
|
end
|
77
83
|
end
|
78
84
|
end
|
data/lib/hts/bam/cigar.rb
CHANGED
@@ -7,7 +7,7 @@ module HTS
|
|
7
7
|
class Bam
|
8
8
|
class Cigar
|
9
9
|
include Enumerable
|
10
|
-
OPS =
|
10
|
+
OPS = "MIDNSHP=XB"
|
11
11
|
|
12
12
|
def initialize(cigar, n_cigar)
|
13
13
|
@c = cigar
|
@@ -21,8 +21,8 @@ module HTS
|
|
21
21
|
def each
|
22
22
|
@n_cigar.times do |i|
|
23
23
|
c = @c[i].read_uint32
|
24
|
-
yield [
|
25
|
-
|
24
|
+
yield [LibHTS.bam_cigar_oplen(c),
|
25
|
+
LibHTS.bam_cigar_opchr(c)]
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
data/lib/hts/bam/flag.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-nim
|
4
|
+
# https://github.com/brentp/hts-nim/blob/master/src/hts/bam/flag.nim
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Bam
|
8
|
+
class Flag
|
9
|
+
def initialize(flag_value)
|
10
|
+
@value = flag_value # tytpe check?
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_accessor :value
|
14
|
+
|
15
|
+
# BAM_FPAIRED = 1
|
16
|
+
# BAM_FPROPER_PAIR = 2
|
17
|
+
# BAM_FUNMAP = 4
|
18
|
+
# BAM_FMUNMAP = 8
|
19
|
+
# BAM_FREVERSE = 16
|
20
|
+
# BAM_FMREVERSE = 32
|
21
|
+
# BAM_FREAD1 = 64
|
22
|
+
# BAM_FREAD2 = 128
|
23
|
+
# BAM_FSECONDARY = 256
|
24
|
+
# BAM_FQCFAIL = 512
|
25
|
+
# BAM_FDUP = 1024
|
26
|
+
# BAM_FSUPPLEMENTARY = 2048
|
27
|
+
|
28
|
+
# TODO: Enabling bitwise operations
|
29
|
+
# hts-nim
|
30
|
+
# proc `and`*(f: Flag, o: uint16): uint16 {. borrow, inline .}
|
31
|
+
# proc `and`*(f: Flag, o: Flag): uint16 {. borrow, inline .}
|
32
|
+
# proc `or`*(f: Flag, o: uint16): uint16 {. borrow .}
|
33
|
+
# proc `or`*(o: uint16, f: Flag): uint16 {. borrow .}
|
34
|
+
# proc `==`*(f: Flag, o: Flag): bool {. borrow, inline .}
|
35
|
+
# proc `==`*(f: Flag, o: uint16): bool {. borrow, inline .}
|
36
|
+
# proc `==`*(o: uint16, f: Flag): bool {. borrow, inline .}
|
37
|
+
|
38
|
+
def paired?
|
39
|
+
has_flag? LibHTS::BAM_FPAIRED
|
40
|
+
end
|
41
|
+
|
42
|
+
def proper_pair?
|
43
|
+
has_flag? LibHTS::BAM_FPROPER_PAIR
|
44
|
+
end
|
45
|
+
|
46
|
+
def unmapped?
|
47
|
+
has_flag? LibHTS::BAM_FUNMAP
|
48
|
+
end
|
49
|
+
|
50
|
+
def mate_unmapped?
|
51
|
+
has_flag? LibHTS::BAM_FMUNMAP
|
52
|
+
end
|
53
|
+
|
54
|
+
def reverse?
|
55
|
+
has_flag? LibHTS::BAM_FREVERSE
|
56
|
+
end
|
57
|
+
|
58
|
+
def mate_reverse?
|
59
|
+
has_flag? LibHTS::BAM_FMREVERSE
|
60
|
+
end
|
61
|
+
|
62
|
+
def read1?
|
63
|
+
has_flag? LibHTS::BAM_FREAD1
|
64
|
+
end
|
65
|
+
|
66
|
+
def read2?
|
67
|
+
has_flag? LibHTS::BAM_FREAD2
|
68
|
+
end
|
69
|
+
|
70
|
+
def secondary?
|
71
|
+
has_flag? LibHTS::BAM_FSECONDARY
|
72
|
+
end
|
73
|
+
|
74
|
+
def qcfail?
|
75
|
+
has_flag? LibHTS::BAM_FQCFAIL
|
76
|
+
end
|
77
|
+
|
78
|
+
def dup?
|
79
|
+
has_flag? LibHTS::BAM_FDUP
|
80
|
+
end
|
81
|
+
|
82
|
+
def supplementary?
|
83
|
+
has_flag? LibHTS::BAM_FSUPPLEMENTARY
|
84
|
+
end
|
85
|
+
|
86
|
+
def has_flag?(o)
|
87
|
+
(@value & o) != 0
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/hts/bam/header.rb
CHANGED
@@ -15,12 +15,12 @@ module HTS
|
|
15
15
|
# FIXME: better name?
|
16
16
|
def seqs
|
17
17
|
Array.new(@h[:n_targets]) do |i|
|
18
|
-
|
18
|
+
LibHTS.sam_hdr_tid2name(@h, i)
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
22
|
def text
|
23
|
-
|
23
|
+
LibHTS.sam_hdr_str(@h)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
@@ -5,7 +5,9 @@
|
|
5
5
|
|
6
6
|
module HTS
|
7
7
|
class Bam
|
8
|
-
class
|
8
|
+
class Record
|
9
|
+
SEQ_NT16_STR = "=ACMGRSVTWYHKDBN"
|
10
|
+
|
9
11
|
def initialize(bam1_t, bam_hdr_t)
|
10
12
|
@b = bam1_t
|
11
13
|
@h = bam_hdr_t
|
@@ -19,9 +21,9 @@ module HTS
|
|
19
21
|
|
20
22
|
def tags; end
|
21
23
|
|
22
|
-
#
|
24
|
+
# returns the query name.
|
23
25
|
def qname
|
24
|
-
|
26
|
+
LibHTS.bam_get_qname(@b).read_string
|
25
27
|
end
|
26
28
|
|
27
29
|
# Set (query) name.
|
@@ -29,12 +31,9 @@ module HTS
|
|
29
31
|
# raise 'Not Implemented'
|
30
32
|
# end
|
31
33
|
|
32
|
-
# returns the
|
33
|
-
def
|
34
|
-
|
35
|
-
return '' if tid == -1
|
36
|
-
|
37
|
-
FFI.sam_hdr_tid2name(@h, tid)
|
34
|
+
# returns the tid of the record or -1 if not mapped.
|
35
|
+
def tid
|
36
|
+
@b[:core][:tid]
|
38
37
|
end
|
39
38
|
|
40
39
|
# returns the tid of the mate or -1 if not mapped.
|
@@ -42,16 +41,6 @@ module HTS
|
|
42
41
|
@b[:core][:mtid]
|
43
42
|
end
|
44
43
|
|
45
|
-
# returns the tid of the alignment or -1 if not mapped.
|
46
|
-
def tid
|
47
|
-
@b[:core][:tid]
|
48
|
-
end
|
49
|
-
|
50
|
-
# mate position
|
51
|
-
def mate_pos
|
52
|
-
@b[:core][:mpos]
|
53
|
-
end
|
54
|
-
|
55
44
|
# returns 0-based start position.
|
56
45
|
def start
|
57
46
|
@b[:core][:pos]
|
@@ -59,19 +48,33 @@ module HTS
|
|
59
48
|
|
60
49
|
# returns end position of the read.
|
61
50
|
def stop
|
62
|
-
|
51
|
+
LibHTS.bam_endpos @b
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns 0-based mate position
|
55
|
+
def mate_start
|
56
|
+
@b[:core][:mpos]
|
63
57
|
end
|
58
|
+
alias mate_pos mate_start
|
64
59
|
|
65
60
|
# returns the chromosome or '' if not mapped.
|
66
61
|
def chrom
|
67
62
|
tid = @b[:core][:tid]
|
68
|
-
return
|
63
|
+
return "" if tid == -1
|
64
|
+
|
65
|
+
LibHTS.sam_hdr_tid2name(@h, tid)
|
66
|
+
end
|
67
|
+
|
68
|
+
# returns the chromosome of the mate or '' if not mapped.
|
69
|
+
def mate_chrom
|
70
|
+
tid = @b[:core][:mtid]
|
71
|
+
return "" if tid == -1
|
69
72
|
|
70
|
-
|
73
|
+
LibHTS.sam_hdr_tid2name(@h, tid)
|
71
74
|
end
|
72
75
|
|
73
76
|
def strand
|
74
|
-
|
77
|
+
LibHTS.bam_is_rev(@b) ? "-" : "+"
|
75
78
|
end
|
76
79
|
|
77
80
|
# def start=(v)
|
@@ -90,61 +93,64 @@ module HTS
|
|
90
93
|
|
91
94
|
# returns a `Cigar` object.
|
92
95
|
def cigar
|
93
|
-
Cigar.new(
|
96
|
+
Cigar.new(LibHTS.bam_get_cigar(@b), @b[:core][:n_cigar])
|
94
97
|
end
|
95
98
|
|
96
99
|
def qlen
|
97
|
-
|
100
|
+
LibHTS.bam_cigar2qlen(
|
98
101
|
@b[:core][:n_cigar],
|
99
|
-
|
102
|
+
LibHTS.bam_get_cigar(@b)
|
100
103
|
)
|
101
104
|
end
|
102
105
|
|
103
106
|
def rlen
|
104
|
-
|
107
|
+
LibHTS.bam_cigar2rlen(
|
105
108
|
@b[:core][:n_cigar],
|
106
|
-
|
109
|
+
LibHTS.bam_get_cigar(@b)
|
107
110
|
)
|
108
111
|
end
|
109
112
|
|
110
113
|
# return the read sequence
|
111
114
|
def sequence
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
end
|
115
|
+
r = LibHTS.bam_get_seq(@b)
|
116
|
+
seq = String.new
|
117
|
+
(@b[:core][:l_qseq]).times do |i|
|
118
|
+
seq << SEQ_NT16_STR[LibHTS.bam_seqi(r, i)]
|
119
|
+
end
|
120
|
+
seq
|
117
121
|
end
|
118
122
|
|
123
|
+
# return only the base of the requested index "i" of the query sequence.
|
119
124
|
def base_at(n)
|
120
125
|
n += @b[:core][:l_qseq] if n < 0
|
121
|
-
|
122
|
-
return '.' if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
|
126
|
+
return "." if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
|
123
127
|
|
124
|
-
r =
|
125
|
-
|
128
|
+
r = LibHTS.bam_get_seq(@b)
|
129
|
+
SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
|
126
130
|
end
|
127
131
|
|
132
|
+
# return the base qualities
|
128
133
|
def base_qualities
|
129
|
-
q_ptr =
|
134
|
+
q_ptr = LibHTS.bam_get_qual(@b)
|
130
135
|
q_ptr.read_array_of_uint8(@b[:core][:l_qseq])
|
131
136
|
end
|
132
137
|
|
138
|
+
# return only the base quality of the requested index "i" of the query sequence.
|
133
139
|
def base_quality_at(n)
|
134
|
-
n += @b[:core][:l_qseq] if n < 0
|
135
|
-
return 0 if (n >= @b[:core][:l_qseq]) || (n < 0)
|
140
|
+
n += @b[:core][:l_qseq] if n < 0
|
141
|
+
return 0 if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_quality_at(-1000)
|
136
142
|
|
137
|
-
q_ptr =
|
143
|
+
q_ptr = LibHTS.bam_get_qual(@b)
|
138
144
|
q_ptr.get_uint8(n)
|
139
145
|
end
|
140
146
|
|
141
147
|
def flag_str
|
142
|
-
|
148
|
+
LibHTS.bam_flag2str(@b[:core][:flag])
|
143
149
|
end
|
144
150
|
|
145
151
|
# returns a `Flag` object.
|
146
152
|
def flag
|
147
|
-
@b[:core][:flag]
|
153
|
+
Flag.new(@b[:core][:flag])
|
148
154
|
end
|
149
155
|
|
150
156
|
# TODO:
|