htslib 0.0.1 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +68 -17
- data/lib/hts/bam/cigar.rb +9 -6
- data/lib/hts/bam/flag.rb +93 -0
- data/lib/hts/bam/header.rb +12 -6
- data/lib/hts/bam/record.rb +195 -0
- data/lib/hts/bam.rb +67 -32
- data/lib/hts/bcf/format.rb +52 -0
- data/lib/hts/bcf/header.rb +19 -0
- data/lib/hts/bcf/info.rb +93 -0
- data/lib/hts/bcf/record.rb +110 -0
- data/lib/hts/bcf.rb +73 -0
- data/lib/hts/faidx.rb +59 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +2 -2
- data/lib/hts/{ffi → libhts}/constants.rb +144 -76
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +2 -2
- data/lib/hts/{ffi → libhts}/hts.rb +9 -3
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/{ffi → libhts}/sam.rb +60 -30
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/{ffi → libhts}/vcf.rb +215 -12
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +28 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +32 -17
- metadata +49 -28
- data/lib/hts/bam/alignment.rb +0 -156
- data/lib/hts/fai.rb +0 -18
- data/lib/hts/ffi.rb +0 -43
- data/lib/hts/tbx.rb +0 -16
- data/lib/hts/vcf.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3eb6fbb228a9fa0642c55cdfa39d5189b311df191725954ae46b8dda135dc8c7
|
4
|
+
data.tar.gz: c90ad39aa4919cefa56e534a706bf9659bba9c6ad69eb374491092ce16ae93f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 392ea7c6acb5b86e9e1c1a29f066049c54954edfd277be30541aee433a7349f9d74df09e663ec0269f30d6b87ff47d66a4a668470d3aa536e25a2058cf963546
|
7
|
+
data.tar.gz: 9cc235f8876b8fd4339593eb84a0f2cb8b605b9c82b624cd0f97691b4109a672f55ae90447bba17fb5efcb8257279ad8c2d87234bca390b1624c7aa8f9a7014e
|
data/README.md
CHANGED
@@ -1,15 +1,26 @@
|
|
1
|
-
#
|
1
|
+
# ruby-htslib
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/htslib)
|
4
4
|

|
5
5
|
[](LICENSE.txt)
|
6
6
|
[](https://zenodo.org/badge/latestdoi/247078205)
|
7
|
+
[](https://rubydoc.info/gems/htslib)
|
7
8
|
|
8
|
-
:dna: [HTSlib](https://github.com/samtools/htslib) -
|
9
|
+
:dna: [HTSlib](https://github.com/samtools/htslib) - for Ruby
|
10
|
+
|
11
|
+
Ruby-htslib is the Ruby bindings to HTSlib, a C library for processing high throughput sequencing (HTS) data.
|
12
|
+
It will provide APIs to read and write file formats such as SAM, BAM, VCF, and BCF.
|
9
13
|
|
10
14
|
:apple: Feel free to fork it out if you can develop it!
|
11
15
|
|
12
|
-
:bowtie:
|
16
|
+
:bowtie: alpha stage.
|
17
|
+
## Requirements
|
18
|
+
|
19
|
+
* [Ruby](https://github.com/ruby/ruby) 2.7 or above.
|
20
|
+
* [HTSlib](https://github.com/samtools/htslib)
|
21
|
+
* Ubuntu : `apt install libhts-dev`
|
22
|
+
* macOS : `brew install htslib`
|
23
|
+
* Build from source code (see Development section)
|
13
24
|
|
14
25
|
## Installation
|
15
26
|
|
@@ -17,30 +28,53 @@
|
|
17
28
|
gem install htslib
|
18
29
|
```
|
19
30
|
|
20
|
-
|
31
|
+
If you have installed htslib with apt on Ubuntu or homebrew on Mac, [pkg-config](https://github.com/ruby-gnome/pkg-config)
|
32
|
+
will automatically detect the location of the shared library.
|
33
|
+
Alternatively, you can specify the directory of the shared library by setting the environment variable `HTSLIBDIR`.
|
21
34
|
|
22
35
|
```sh
|
23
|
-
export HTSLIBDIR="/your/path/to/htslib"
|
36
|
+
export HTSLIBDIR="/your/path/to/htslib" # libhts.so
|
24
37
|
```
|
25
38
|
|
26
|
-
##
|
27
|
-
|
28
|
-
* [htslib](https://github.com/samtools/htslib)
|
39
|
+
## Overview
|
29
40
|
|
30
|
-
|
41
|
+
### Low level API
|
31
42
|
|
32
|
-
HTS::
|
43
|
+
`HTS::LibHTS` provides native functions.
|
33
44
|
|
34
45
|
```ruby
|
35
46
|
require 'htslib'
|
36
47
|
|
37
|
-
a = HTS::
|
38
|
-
b = HTS::
|
48
|
+
a = HTS::LibHTS.hts_open("a.bam", "r")
|
49
|
+
b = HTS::LibHTS.hts_get_format(a)
|
39
50
|
p b[:category]
|
40
51
|
p b[:format]
|
41
52
|
```
|
42
53
|
|
43
|
-
|
54
|
+
Note: Managed struct is not used in ruby-htslib. You may need to free the memory by yourself.
|
55
|
+
|
56
|
+
### High level API (Plan)
|
57
|
+
|
58
|
+
`Cram` `Bam` `Bcf` `Faidx` `Tabix`
|
59
|
+
|
60
|
+
A high-level API is under development. We will change and improve the API to make it better.
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
require 'htslib'
|
64
|
+
|
65
|
+
bam = HTS::Bam.new("a.bam")
|
66
|
+
|
67
|
+
bam.each do |r|
|
68
|
+
p name: r.qname,
|
69
|
+
flag: r.flag,
|
70
|
+
start: r.start + 1,
|
71
|
+
mpos: r.mate_pos + 1,
|
72
|
+
mqual: r.mapping_quality,
|
73
|
+
seq: r.sequence,
|
74
|
+
cigar: r.cigar.to_s,
|
75
|
+
qual: r.base_qualities.map { |i| (i + 33).chr }.join
|
76
|
+
end
|
77
|
+
```
|
44
78
|
|
45
79
|
## Documentation
|
46
80
|
|
@@ -51,25 +85,42 @@ A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python
|
|
51
85
|
To get started with development
|
52
86
|
|
53
87
|
```sh
|
54
|
-
git clone --
|
88
|
+
git clone --recursive https://github.com/kojix2/ruby-htslib
|
55
89
|
cd ruby-htslib
|
56
90
|
bundle install
|
57
|
-
bundle exec rake htslib:
|
58
|
-
bundle exec rake
|
91
|
+
bundle exec rake htslib:build
|
92
|
+
bundle exec rake test
|
59
93
|
```
|
60
94
|
|
95
|
+
We plan to actively use the new features of Ruby. Since the number of users is small, backward compatibility is not important.
|
96
|
+
On the other hand, we will consider compatibility with [Crystal](https://github.com/bio-crystal/htslib.cr) to some extent.
|
97
|
+
|
98
|
+
#### FFI Extensions
|
99
|
+
|
100
|
+
* [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) : Extension of Ruby-FFI to support bitfields.
|
101
|
+
|
102
|
+
#### Automatic generation or automatic validation (Future plan)
|
103
|
+
|
104
|
+
+ [c2ffi](https://github.com/rpav/c2ffi) is a tool to create JSON format metadata from C header files. It is planned to use c2ffi to automatically generate bindings or tests.
|
105
|
+
|
61
106
|
## Contributing
|
62
107
|
|
108
|
+
Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
|
109
|
+
|
63
110
|
* [Report bugs](https://github.com/kojix2/ruby-htslib/issues)
|
64
111
|
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-htslib/pulls)
|
65
112
|
* Write, clarify, or fix documentation
|
66
113
|
* Suggest or add new features
|
114
|
+
* [financial contributions](https://github.com/sponsors/kojix2)
|
67
115
|
|
68
116
|
## Links
|
69
117
|
|
70
118
|
* [samtools/hts-spec](https://github.com/samtools/hts-specs)
|
71
|
-
* [
|
119
|
+
* [bioruby](https://github.com/bioruby/bioruby)
|
120
|
+
|
121
|
+
## Funding support
|
72
122
|
|
123
|
+
This work was supported partially by [Ruby Association Grant 2020](https://www.ruby.or.jp/en/news/20201022).
|
73
124
|
## License
|
74
125
|
|
75
126
|
[MIT License](https://opensource.org/licenses/MIT).
|
data/lib/hts/bam/cigar.rb
CHANGED
@@ -7,22 +7,25 @@ module HTS
|
|
7
7
|
class Bam
|
8
8
|
class Cigar
|
9
9
|
include Enumerable
|
10
|
-
OPS = 'MIDNSHP=XB'
|
11
10
|
|
12
|
-
def initialize(
|
13
|
-
@
|
11
|
+
def initialize(pointer, n_cigar)
|
12
|
+
@pointer = pointer
|
14
13
|
@n_cigar = n_cigar
|
15
14
|
end
|
16
15
|
|
16
|
+
def to_ptr
|
17
|
+
@pointer
|
18
|
+
end
|
19
|
+
|
17
20
|
def to_s
|
18
21
|
to_a.flatten.join
|
19
22
|
end
|
20
23
|
|
21
24
|
def each
|
22
25
|
@n_cigar.times do |i|
|
23
|
-
c = @
|
24
|
-
yield [
|
25
|
-
|
26
|
+
c = @pointer[i].read_uint32
|
27
|
+
yield [LibHTS.bam_cigar_oplen(c),
|
28
|
+
LibHTS.bam_cigar_opchr(c)]
|
26
29
|
end
|
27
30
|
end
|
28
31
|
end
|
data/lib/hts/bam/flag.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-nim
|
4
|
+
# https://github.com/brentp/hts-nim/blob/master/src/hts/bam/flag.nim
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Bam
|
8
|
+
class Flag
|
9
|
+
def initialize(flag_value)
|
10
|
+
raise TypeError unless flag_value.is_a? Integer
|
11
|
+
|
12
|
+
@value = flag_value
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_accessor :value
|
16
|
+
|
17
|
+
# BAM_FPAIRED = 1
|
18
|
+
# BAM_FPROPER_PAIR = 2
|
19
|
+
# BAM_FUNMAP = 4
|
20
|
+
# BAM_FMUNMAP = 8
|
21
|
+
# BAM_FREVERSE = 16
|
22
|
+
# BAM_FMREVERSE = 32
|
23
|
+
# BAM_FREAD1 = 64
|
24
|
+
# BAM_FREAD2 = 128
|
25
|
+
# BAM_FSECONDARY = 256
|
26
|
+
# BAM_FQCFAIL = 512
|
27
|
+
# BAM_FDUP = 1024
|
28
|
+
# BAM_FSUPPLEMENTARY = 2048
|
29
|
+
|
30
|
+
# TODO: Enabling bitwise operations
|
31
|
+
# hts-nim
|
32
|
+
# proc `and`*(f: Flag, o: uint16): uint16 {. borrow, inline .}
|
33
|
+
# proc `and`*(f: Flag, o: Flag): uint16 {. borrow, inline .}
|
34
|
+
# proc `or`*(f: Flag, o: uint16): uint16 {. borrow .}
|
35
|
+
# proc `or`*(o: uint16, f: Flag): uint16 {. borrow .}
|
36
|
+
# proc `==`*(f: Flag, o: Flag): bool {. borrow, inline .}
|
37
|
+
# proc `==`*(f: Flag, o: uint16): bool {. borrow, inline .}
|
38
|
+
# proc `==`*(o: uint16, f: Flag): bool {. borrow, inline .}
|
39
|
+
|
40
|
+
def paired?
|
41
|
+
has_flag? LibHTS::BAM_FPAIRED
|
42
|
+
end
|
43
|
+
|
44
|
+
def proper_pair?
|
45
|
+
has_flag? LibHTS::BAM_FPROPER_PAIR
|
46
|
+
end
|
47
|
+
|
48
|
+
def unmapped?
|
49
|
+
has_flag? LibHTS::BAM_FUNMAP
|
50
|
+
end
|
51
|
+
|
52
|
+
def mate_unmapped?
|
53
|
+
has_flag? LibHTS::BAM_FMUNMAP
|
54
|
+
end
|
55
|
+
|
56
|
+
def reverse?
|
57
|
+
has_flag? LibHTS::BAM_FREVERSE
|
58
|
+
end
|
59
|
+
|
60
|
+
def mate_reverse?
|
61
|
+
has_flag? LibHTS::BAM_FMREVERSE
|
62
|
+
end
|
63
|
+
|
64
|
+
def read1?
|
65
|
+
has_flag? LibHTS::BAM_FREAD1
|
66
|
+
end
|
67
|
+
|
68
|
+
def read2?
|
69
|
+
has_flag? LibHTS::BAM_FREAD2
|
70
|
+
end
|
71
|
+
|
72
|
+
def secondary?
|
73
|
+
has_flag? LibHTS::BAM_FSECONDARY
|
74
|
+
end
|
75
|
+
|
76
|
+
def qcfail?
|
77
|
+
has_flag? LibHTS::BAM_FQCFAIL
|
78
|
+
end
|
79
|
+
|
80
|
+
def dup?
|
81
|
+
has_flag? LibHTS::BAM_FDUP
|
82
|
+
end
|
83
|
+
|
84
|
+
def supplementary?
|
85
|
+
has_flag? LibHTS::BAM_FSUPPLEMENTARY
|
86
|
+
end
|
87
|
+
|
88
|
+
def has_flag?(o)
|
89
|
+
@value[o] != 0
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
data/lib/hts/bam/header.rb
CHANGED
@@ -6,21 +6,27 @@
|
|
6
6
|
module HTS
|
7
7
|
class Bam
|
8
8
|
class Header
|
9
|
-
|
9
|
+
def initialize(pointer)
|
10
|
+
@sam_hdr = pointer
|
11
|
+
end
|
12
|
+
|
13
|
+
def struct
|
14
|
+
@sam_hdr
|
15
|
+
end
|
10
16
|
|
11
|
-
def
|
12
|
-
@
|
17
|
+
def to_ptr
|
18
|
+
@sam_hdr.to_ptr
|
13
19
|
end
|
14
20
|
|
15
21
|
# FIXME: better name?
|
16
22
|
def seqs
|
17
|
-
Array.new(@
|
18
|
-
|
23
|
+
Array.new(@sam_hdr[:n_targets]) do |i|
|
24
|
+
LibHTS.sam_hdr_tid2name(@sam_hdr, i)
|
19
25
|
end
|
20
26
|
end
|
21
27
|
|
22
28
|
def text
|
23
|
-
|
29
|
+
LibHTS.sam_hdr_str(@sam_hdr)
|
24
30
|
end
|
25
31
|
end
|
26
32
|
end
|
@@ -0,0 +1,195 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Bam
|
8
|
+
class Record
|
9
|
+
SEQ_NT16_STR = "=ACMGRSVTWYHKDBN"
|
10
|
+
|
11
|
+
def initialize(bam1_t, header)
|
12
|
+
@bam1 = bam1_t
|
13
|
+
@header = header
|
14
|
+
end
|
15
|
+
|
16
|
+
def struct
|
17
|
+
@bam1
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_ptr
|
21
|
+
@bam1.to_ptr
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :header
|
25
|
+
|
26
|
+
# def initialize_copy
|
27
|
+
# super
|
28
|
+
# end
|
29
|
+
|
30
|
+
def self.rom_sam_str; end
|
31
|
+
|
32
|
+
def tags; end
|
33
|
+
|
34
|
+
# returns the query name.
|
35
|
+
def qname
|
36
|
+
LibHTS.bam_get_qname(@bam1).read_string
|
37
|
+
end
|
38
|
+
|
39
|
+
# Set (query) name.
|
40
|
+
# def qname=(name)
|
41
|
+
# raise 'Not Implemented'
|
42
|
+
# end
|
43
|
+
|
44
|
+
# returns the tid of the record or -1 if not mapped.
|
45
|
+
def tid
|
46
|
+
@bam1[:core][:tid]
|
47
|
+
end
|
48
|
+
|
49
|
+
# returns the tid of the mate or -1 if not mapped.
|
50
|
+
def mate_tid
|
51
|
+
@bam1[:core][:mtid]
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns 0-based start position.
|
55
|
+
def start
|
56
|
+
@bam1[:core][:pos]
|
57
|
+
end
|
58
|
+
|
59
|
+
# returns end position of the read.
|
60
|
+
def stop
|
61
|
+
LibHTS.bam_endpos @bam1
|
62
|
+
end
|
63
|
+
|
64
|
+
# returns 0-based mate position
|
65
|
+
def mate_start
|
66
|
+
@bam1[:core][:mpos]
|
67
|
+
end
|
68
|
+
alias mate_pos mate_start
|
69
|
+
|
70
|
+
# returns the chromosome or '' if not mapped.
|
71
|
+
def chrom
|
72
|
+
tid = @bam1[:core][:tid]
|
73
|
+
return "" if tid == -1
|
74
|
+
|
75
|
+
LibHTS.sam_hdr_tid2name(@header, tid)
|
76
|
+
end
|
77
|
+
|
78
|
+
# returns the chromosome of the mate or '' if not mapped.
|
79
|
+
def mate_chrom
|
80
|
+
tid = @bam1[:core][:mtid]
|
81
|
+
return "" if tid == -1
|
82
|
+
|
83
|
+
LibHTS.sam_hdr_tid2name(@header, tid)
|
84
|
+
end
|
85
|
+
|
86
|
+
def strand
|
87
|
+
LibHTS.bam_is_rev(@bam1) ? "-" : "+"
|
88
|
+
end
|
89
|
+
|
90
|
+
# def start=(v)
|
91
|
+
# raise 'Not Implemented'
|
92
|
+
# end
|
93
|
+
|
94
|
+
# insert size
|
95
|
+
def isize
|
96
|
+
@bam1[:core][:isize]
|
97
|
+
end
|
98
|
+
|
99
|
+
# mapping quality
|
100
|
+
def mapping_quality
|
101
|
+
@bam1[:core][:qual]
|
102
|
+
end
|
103
|
+
|
104
|
+
# returns a `Cigar` object.
|
105
|
+
def cigar
|
106
|
+
Cigar.new(LibHTS.bam_get_cigar(@bam1), @bam1[:core][:n_cigar])
|
107
|
+
end
|
108
|
+
|
109
|
+
def qlen
|
110
|
+
LibHTS.bam_cigar2qlen(
|
111
|
+
@bam1[:core][:n_cigar],
|
112
|
+
LibHTS.bam_get_cigar(@bam1)
|
113
|
+
)
|
114
|
+
end
|
115
|
+
|
116
|
+
def rlen
|
117
|
+
LibHTS.bam_cigar2rlen(
|
118
|
+
@bam1[:core][:n_cigar],
|
119
|
+
LibHTS.bam_get_cigar(@bam1)
|
120
|
+
)
|
121
|
+
end
|
122
|
+
|
123
|
+
# return the read sequence
|
124
|
+
def sequence
|
125
|
+
r = LibHTS.bam_get_seq(@bam1)
|
126
|
+
seq = String.new
|
127
|
+
(@bam1[:core][:l_qseq]).times do |i|
|
128
|
+
seq << SEQ_NT16_STR[LibHTS.bam_seqi(r, i)]
|
129
|
+
end
|
130
|
+
seq
|
131
|
+
end
|
132
|
+
|
133
|
+
# return only the base of the requested index "i" of the query sequence.
|
134
|
+
def base_at(n)
|
135
|
+
n += @bam1[:core][:l_qseq] if n < 0
|
136
|
+
return "." if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
|
137
|
+
|
138
|
+
r = LibHTS.bam_get_seq(@bam1)
|
139
|
+
SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
|
140
|
+
end
|
141
|
+
|
142
|
+
# return the base qualities
|
143
|
+
def base_qualities
|
144
|
+
q_ptr = LibHTS.bam_get_qual(@bam1)
|
145
|
+
q_ptr.read_array_of_uint8(@bam1[:core][:l_qseq])
|
146
|
+
end
|
147
|
+
|
148
|
+
# return only the base quality of the requested index "i" of the query sequence.
|
149
|
+
def base_quality_at(n)
|
150
|
+
n += @bam1[:core][:l_qseq] if n < 0
|
151
|
+
return 0 if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base_quality_at(-1000)
|
152
|
+
|
153
|
+
q_ptr = LibHTS.bam_get_qual(@bam1)
|
154
|
+
q_ptr.get_uint8(n)
|
155
|
+
end
|
156
|
+
|
157
|
+
def flag_str
|
158
|
+
LibHTS.bam_flag2str(@bam1[:core][:flag])
|
159
|
+
end
|
160
|
+
|
161
|
+
# returns a `Flag` object.
|
162
|
+
def flag
|
163
|
+
Flag.new(@bam1[:core][:flag])
|
164
|
+
end
|
165
|
+
|
166
|
+
def tag(str)
|
167
|
+
aux = LibHTS.bam_aux_get(@bam1, str)
|
168
|
+
return nil if aux.null?
|
169
|
+
|
170
|
+
t = aux.read_string(1)
|
171
|
+
case t
|
172
|
+
when "i", "I", "c", "C", "s", "S"
|
173
|
+
LibHTS.bam_aux2i(aux)
|
174
|
+
when "f", "d"
|
175
|
+
LibHTS.bam_aux2f(aux)
|
176
|
+
when "Z", "H"
|
177
|
+
LibHTS.bam_aux2Z(aux)
|
178
|
+
when "A"
|
179
|
+
LibHTS.bam_aux2A(aux)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def to_s
|
184
|
+
kstr = LibHTS::KString.new
|
185
|
+
raise "Failed to format bam record" if LibHTS.sam_format1(@header.struct, @bam1, kstr) == -1
|
186
|
+
|
187
|
+
kstr[:s]
|
188
|
+
end
|
189
|
+
|
190
|
+
# TODO:
|
191
|
+
# def eql?
|
192
|
+
# def hash
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
data/lib/hts/bam.rb
CHANGED
@@ -3,76 +3,111 @@
|
|
3
3
|
# Based on hts-python
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
5
5
|
|
6
|
-
require_relative
|
7
|
-
require_relative
|
8
|
-
require_relative
|
6
|
+
require_relative "bam/header"
|
7
|
+
require_relative "bam/cigar"
|
8
|
+
require_relative "bam/flag"
|
9
|
+
require_relative "bam/record"
|
9
10
|
|
10
11
|
module HTS
|
11
12
|
class Bam
|
12
13
|
include Enumerable
|
13
|
-
attr_reader :fname, :mode, :header, :htf
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
attr_reader :file_path, :mode, :header
|
16
|
+
# HtfFile is FFI::BitStruct
|
17
|
+
attr_reader :htf_file
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
class << self
|
20
|
+
alias open new
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
@header = Bam::Header.new(FFI.sam_hdr_read(@htf))
|
30
|
-
@b = FFI.bam_init1
|
23
|
+
def initialize(file_path, mode = "r", create_index: nil)
|
24
|
+
file_path = File.expand_path(file_path)
|
25
|
+
|
26
|
+
unless File.exist?(file_path)
|
27
|
+
message = "No such SAM/BAM file - #{file_path}"
|
28
|
+
raise message
|
29
|
+
end
|
31
30
|
|
31
|
+
@file_path = file_path
|
32
|
+
@mode = mode
|
33
|
+
@htf_file = LibHTS.hts_open(file_path, mode)
|
34
|
+
@header = Bam::Header.new(LibHTS.sam_hdr_read(htf_file))
|
35
|
+
|
36
|
+
# read
|
37
|
+
if mode[0] == "r"
|
38
|
+
# load index
|
39
|
+
@idx = LibHTS.sam_index_load(htf_file, file_path)
|
40
|
+
# create index
|
41
|
+
if create_index || (@idx.null? && create_index.nil?)
|
42
|
+
warn "Create index for #{file_path}"
|
43
|
+
LibHTS.sam_index_build(file_path, -1)
|
44
|
+
@idx = LibHTS.sam_index_load(htf_file, file_path)
|
45
|
+
end
|
32
46
|
else
|
33
|
-
# FIXME
|
34
|
-
raise
|
47
|
+
# FIXME: implement
|
48
|
+
raise "not implemented yet."
|
49
|
+
end
|
35
50
|
|
51
|
+
# IO like API
|
52
|
+
if block_given?
|
53
|
+
begin
|
54
|
+
yield self
|
55
|
+
ensure
|
56
|
+
close
|
57
|
+
end
|
36
58
|
end
|
37
59
|
end
|
38
60
|
|
39
|
-
def
|
61
|
+
def struct
|
62
|
+
htf_file
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_ptr
|
66
|
+
htf_file.to_ptr
|
67
|
+
end
|
40
68
|
|
41
69
|
def write(alns)
|
42
70
|
alns.each do
|
43
|
-
|
71
|
+
LibHTS.sam_write1(htf_file, header, alns.b) > 0 || raise
|
44
72
|
end
|
45
73
|
end
|
46
74
|
|
47
75
|
# Close the current file.
|
48
76
|
def close
|
49
|
-
|
77
|
+
LibHTS.hts_close(htf_file)
|
50
78
|
end
|
51
79
|
|
52
80
|
# Flush the current file.
|
53
81
|
def flush
|
54
|
-
|
55
|
-
# FFI.bgzf_flush(@htf.fp.bgzf)
|
82
|
+
# LibHTS.bgzf_flush(@htf_file.fp.bgzf)
|
56
83
|
end
|
57
84
|
|
58
|
-
def each
|
85
|
+
def each
|
59
86
|
# Each does not always start at the beginning of the file.
|
60
87
|
# This is the common behavior of IO objects in Ruby.
|
61
88
|
# This may change in the future.
|
62
|
-
|
89
|
+
return to_enum(__method__) unless block_given?
|
90
|
+
|
91
|
+
while LibHTS.sam_read1(htf_file, header, bam1 = LibHTS.bam_init1) > 0
|
92
|
+
record = Record.new(bam1, header)
|
93
|
+
yield record
|
94
|
+
end
|
95
|
+
self
|
63
96
|
end
|
64
97
|
|
65
98
|
# query [WIP]
|
66
99
|
def query(region)
|
67
|
-
qiter =
|
100
|
+
qiter = LibHTS.sam_itr_querys(@idx, header, region)
|
68
101
|
begin
|
69
|
-
|
102
|
+
bam1 = LibHTS.bam_init1
|
103
|
+
slen = LibHTS.sam_itr_next(htf_file, qiter, bam1)
|
70
104
|
while slen > 0
|
71
|
-
yield
|
72
|
-
|
105
|
+
yield Record.new(bam1, header)
|
106
|
+
bam1 = LibHTS.bam_init1
|
107
|
+
slen = LibHTS.sam_itr_next(htf_file, qiter, bam1)
|
73
108
|
end
|
74
109
|
ensure
|
75
|
-
|
110
|
+
LibHTS.hts_itr_destroy(qiter)
|
76
111
|
end
|
77
112
|
end
|
78
113
|
end
|