htslib 0.0.2 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +50 -22
- data/lib/hts/bam/cigar.rb +11 -6
- data/lib/hts/bam/flag.rb +97 -0
- data/lib/hts/bam/header.rb +17 -7
- data/lib/hts/bam/record.rb +199 -0
- data/lib/hts/bam.rb +67 -32
- data/lib/hts/bcf/format.rb +53 -0
- data/lib/hts/bcf/header.rb +26 -0
- data/lib/hts/bcf/info.rb +94 -0
- data/lib/hts/bcf/record.rb +113 -0
- data/lib/hts/bcf.rb +73 -0
- data/lib/hts/faidx.rb +59 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +126 -47
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +13 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/libhts/sam.rb +102 -0
- data/lib/hts/{ffi/sam.rb → libhts/sam_funcs.rb} +24 -120
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/libhts/vcf.rb +226 -0
- data/lib/hts/{ffi/vcf.rb → libhts/vcf_funcs.rb} +1 -70
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +28 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +48 -27
- data/lib/hts/bam/alignment.rb +0 -155
- data/lib/hts/fai.rb +0 -57
- data/lib/hts/ffi.rb +0 -85
- data/lib/hts/tbx.rb +0 -16
- data/lib/hts/vcf/header.rb +0 -24
- data/lib/hts/vcf/variant.rb +0 -43
- data/lib/hts/vcf.rb +0 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f05bc93a621f3d5fb8d06e44c20ac4a3a95c5f6e243df2aebc97e10125fcb779
|
4
|
+
data.tar.gz: 73e747aa0999e54b3c8f01b981b91c4e293167265276a24eac85ea0fd6b85c13
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d0f3f8bb9f7a9c063222fd2dc6463f58bdfd784aff39757921dda1c8d417023b024ca9f6c92d7f0eecd30b8064adb1b94966cf962cf9e0e56d67303de0a903b
|
7
|
+
data.tar.gz: 8ef35b7aef04bf2f51ae0ba110d9ffc81c26d7a0c97a821aa1274c04b31ec29515b33c278391494f6a0d8efc5e2d965060ccd190ee0f92a2ec3562fb59c49169
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# ruby-htslib
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/htslib.svg)](https://badge.fury.io/rb/htslib)
|
4
4
|
![CI](https://github.com/kojix2/ruby-htslib/workflows/CI/badge.svg)
|
@@ -6,17 +6,22 @@
|
|
6
6
|
[![DOI](https://zenodo.org/badge/247078205.svg)](https://zenodo.org/badge/latestdoi/247078205)
|
7
7
|
[![Docs Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/htslib)
|
8
8
|
|
9
|
-
:dna: [HTSlib](https://github.com/samtools/htslib) -
|
9
|
+
:dna: [HTSlib](https://github.com/samtools/htslib) - for Ruby
|
10
|
+
|
11
|
+
Ruby-htslib is the Ruby bindings to HTSlib, a C library for processing high throughput sequencing (HTS) data.
|
12
|
+
It will provide APIs to read and write file formats such as [SAM, BAM, VCF, and BCF](http://samtools.github.io/hts-specs/).
|
10
13
|
|
11
14
|
:apple: Feel free to fork it out if you can develop it!
|
12
15
|
|
13
|
-
:bowtie:
|
16
|
+
:bowtie: alpha stage.
|
14
17
|
|
15
18
|
## Requirements
|
16
19
|
|
17
|
-
* [
|
20
|
+
* [Ruby](https://github.com/ruby/ruby) 2.7 or above.
|
21
|
+
* [HTSlib](https://github.com/samtools/htslib)
|
18
22
|
* Ubuntu : `apt install libhts-dev`
|
19
23
|
* macOS : `brew install htslib`
|
24
|
+
* Build from source code (see Development section)
|
20
25
|
|
21
26
|
## Installation
|
22
27
|
|
@@ -24,43 +29,51 @@
|
|
24
29
|
gem install htslib
|
25
30
|
```
|
26
31
|
|
27
|
-
If you installed htslib with
|
28
|
-
|
29
|
-
|
32
|
+
If you have installed htslib with apt on Ubuntu or homebrew on Mac, [pkg-config](https://github.com/ruby-gnome/pkg-config)
|
33
|
+
will automatically detect the location of the shared library.
|
34
|
+
Alternatively, you can specify the directory of the shared library by setting the environment variable `HTSLIBDIR`.
|
30
35
|
|
31
36
|
```sh
|
32
37
|
export HTSLIBDIR="/your/path/to/htslib" # libhts.so
|
33
38
|
```
|
34
39
|
|
35
|
-
##
|
40
|
+
## Overview
|
41
|
+
|
42
|
+
### Low level API
|
36
43
|
|
37
|
-
HTS::
|
44
|
+
`HTS::LibHTS` provides native functions.
|
38
45
|
|
39
46
|
```ruby
|
40
47
|
require 'htslib'
|
41
48
|
|
42
|
-
a = HTS::
|
43
|
-
b = HTS::
|
49
|
+
a = HTS::LibHTS.hts_open("a.bam", "r")
|
50
|
+
b = HTS::LibHTS.hts_get_format(a)
|
44
51
|
p b[:category]
|
45
52
|
p b[:format]
|
46
53
|
```
|
47
54
|
|
48
|
-
|
55
|
+
Note: Managed struct is not used in ruby-htslib. You may need to free the memory by yourself.
|
56
|
+
|
57
|
+
### High level API (Plan)
|
58
|
+
|
59
|
+
`Cram` `Bam` `Bcf` `Faidx` `Tabix`
|
60
|
+
|
61
|
+
A high-level API is under development. We will change and improve the API to make it better.
|
49
62
|
|
50
63
|
```ruby
|
51
64
|
require 'htslib'
|
52
65
|
|
53
66
|
bam = HTS::Bam.new("a.bam")
|
54
67
|
|
55
|
-
bam.each do |
|
56
|
-
p name:
|
57
|
-
flag:
|
58
|
-
start:
|
59
|
-
mpos:
|
60
|
-
mqual:
|
61
|
-
seq:
|
62
|
-
cigar:
|
63
|
-
qual:
|
68
|
+
bam.each do |r|
|
69
|
+
p name: r.qname,
|
70
|
+
flag: r.flag,
|
71
|
+
start: r.start + 1,
|
72
|
+
mpos: r.mate_pos + 1,
|
73
|
+
mqual: r.mapping_quality,
|
74
|
+
seq: r.sequence,
|
75
|
+
cigar: r.cigar.to_s,
|
76
|
+
qual: r.base_qualities.map { |i| (i + 33).chr }.join
|
64
77
|
end
|
65
78
|
```
|
66
79
|
|
@@ -80,6 +93,17 @@ bundle exec rake htslib:build
|
|
80
93
|
bundle exec rake test
|
81
94
|
```
|
82
95
|
|
96
|
+
* Actively use the advanced features of Ruby.
|
97
|
+
* Consider compatibility with [htslib.cr](https://github.com/bio-crystal/htslib.cr) to some extent.
|
98
|
+
|
99
|
+
#### FFI Extensions
|
100
|
+
|
101
|
+
* [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) : Extension of Ruby-FFI to support bitfields.
|
102
|
+
|
103
|
+
#### Automatic generation or automatic validation (Future plan)
|
104
|
+
|
105
|
+
+ [c2ffi](https://github.com/rpav/c2ffi) is a tool to create JSON format metadata from C header files. It is planned to use c2ffi to automatically generate bindings or tests.
|
106
|
+
|
83
107
|
## Contributing
|
84
108
|
|
85
109
|
Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
|
@@ -88,12 +112,16 @@ Ruby-htslib is a library under development, so even small improvements like typo
|
|
88
112
|
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-htslib/pulls)
|
89
113
|
* Write, clarify, or fix documentation
|
90
114
|
* Suggest or add new features
|
115
|
+
* [financial contributions](https://github.com/sponsors/kojix2)
|
91
116
|
|
92
117
|
## Links
|
93
118
|
|
94
119
|
* [samtools/hts-spec](https://github.com/samtools/hts-specs)
|
95
|
-
* [
|
120
|
+
* [bioruby](https://github.com/bioruby/bioruby)
|
121
|
+
|
122
|
+
## Funding support
|
96
123
|
|
124
|
+
This work was supported partially by [Ruby Association Grant 2020](https://www.ruby.or.jp/en/news/20201022).
|
97
125
|
## License
|
98
126
|
|
99
127
|
[MIT License](https://opensource.org/licenses/MIT).
|
data/lib/hts/bam/cigar.rb
CHANGED
@@ -7,22 +7,27 @@ module HTS
|
|
7
7
|
class Bam
|
8
8
|
class Cigar
|
9
9
|
include Enumerable
|
10
|
-
OPS = 'MIDNSHP=XB'
|
11
10
|
|
12
|
-
def initialize(
|
13
|
-
@
|
11
|
+
def initialize(pointer, n_cigar)
|
12
|
+
@pointer = pointer
|
14
13
|
@n_cigar = n_cigar
|
15
14
|
end
|
16
15
|
|
16
|
+
def to_ptr
|
17
|
+
@pointer
|
18
|
+
end
|
19
|
+
|
17
20
|
def to_s
|
18
21
|
to_a.flatten.join
|
19
22
|
end
|
20
23
|
|
21
24
|
def each
|
25
|
+
return to_enum(__method__) unless block_given?
|
26
|
+
|
22
27
|
@n_cigar.times do |i|
|
23
|
-
c = @
|
24
|
-
yield [
|
25
|
-
|
28
|
+
c = @pointer[i].read_uint32
|
29
|
+
yield [LibHTS.bam_cigar_oplen(c),
|
30
|
+
LibHTS.bam_cigar_opchr(c)]
|
26
31
|
end
|
27
32
|
end
|
28
33
|
end
|
data/lib/hts/bam/flag.rb
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-nim
|
4
|
+
# https://github.com/brentp/hts-nim/blob/master/src/hts/bam/flag.nim
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Bam
|
8
|
+
class Flag
|
9
|
+
def initialize(flag_value)
|
10
|
+
raise TypeError unless flag_value.is_a? Integer
|
11
|
+
|
12
|
+
@value = flag_value
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_accessor :value
|
16
|
+
|
17
|
+
# BAM_FPAIRED = 1
|
18
|
+
# BAM_FPROPER_PAIR = 2
|
19
|
+
# BAM_FUNMAP = 4
|
20
|
+
# BAM_FMUNMAP = 8
|
21
|
+
# BAM_FREVERSE = 16
|
22
|
+
# BAM_FMREVERSE = 32
|
23
|
+
# BAM_FREAD1 = 64
|
24
|
+
# BAM_FREAD2 = 128
|
25
|
+
# BAM_FSECONDARY = 256
|
26
|
+
# BAM_FQCFAIL = 512
|
27
|
+
# BAM_FDUP = 1024
|
28
|
+
# BAM_FSUPPLEMENTARY = 2048
|
29
|
+
|
30
|
+
# TODO: Enabling bitwise operations
|
31
|
+
# hts-nim
|
32
|
+
# proc `and`*(f: Flag, o: uint16): uint16 {. borrow, inline .}
|
33
|
+
# proc `and`*(f: Flag, o: Flag): uint16 {. borrow, inline .}
|
34
|
+
# proc `or`*(f: Flag, o: uint16): uint16 {. borrow .}
|
35
|
+
# proc `or`*(o: uint16, f: Flag): uint16 {. borrow .}
|
36
|
+
# proc `==`*(f: Flag, o: Flag): bool {. borrow, inline .}
|
37
|
+
# proc `==`*(f: Flag, o: uint16): bool {. borrow, inline .}
|
38
|
+
# proc `==`*(o: uint16, f: Flag): bool {. borrow, inline .}
|
39
|
+
|
40
|
+
def paired?
|
41
|
+
has_flag? LibHTS::BAM_FPAIRED
|
42
|
+
end
|
43
|
+
|
44
|
+
def proper_pair?
|
45
|
+
has_flag? LibHTS::BAM_FPROPER_PAIR
|
46
|
+
end
|
47
|
+
|
48
|
+
def unmapped?
|
49
|
+
has_flag? LibHTS::BAM_FUNMAP
|
50
|
+
end
|
51
|
+
|
52
|
+
def mate_unmapped?
|
53
|
+
has_flag? LibHTS::BAM_FMUNMAP
|
54
|
+
end
|
55
|
+
|
56
|
+
def reverse?
|
57
|
+
has_flag? LibHTS::BAM_FREVERSE
|
58
|
+
end
|
59
|
+
|
60
|
+
def mate_reverse?
|
61
|
+
has_flag? LibHTS::BAM_FMREVERSE
|
62
|
+
end
|
63
|
+
|
64
|
+
def read1?
|
65
|
+
has_flag? LibHTS::BAM_FREAD1
|
66
|
+
end
|
67
|
+
|
68
|
+
def read2?
|
69
|
+
has_flag? LibHTS::BAM_FREAD2
|
70
|
+
end
|
71
|
+
|
72
|
+
def secondary?
|
73
|
+
has_flag? LibHTS::BAM_FSECONDARY
|
74
|
+
end
|
75
|
+
|
76
|
+
def qcfail?
|
77
|
+
has_flag? LibHTS::BAM_FQCFAIL
|
78
|
+
end
|
79
|
+
|
80
|
+
def dup?
|
81
|
+
has_flag? LibHTS::BAM_FDUP
|
82
|
+
end
|
83
|
+
|
84
|
+
def supplementary?
|
85
|
+
has_flag? LibHTS::BAM_FSUPPLEMENTARY
|
86
|
+
end
|
87
|
+
|
88
|
+
def has_flag?(m)
|
89
|
+
(@value & m) != 0
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_s
|
93
|
+
"0x#{format('%x', @value)}\t#{@value}\t#{LibHTS.bam_flag2str(@value)}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/lib/hts/bam/header.rb
CHANGED
@@ -6,21 +6,31 @@
|
|
6
6
|
module HTS
|
7
7
|
class Bam
|
8
8
|
class Header
|
9
|
-
|
9
|
+
def initialize(pointer)
|
10
|
+
@sam_hdr = pointer
|
11
|
+
end
|
12
|
+
|
13
|
+
def struct
|
14
|
+
@sam_hdr
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_ptr
|
18
|
+
@sam_hdr.to_ptr
|
19
|
+
end
|
10
20
|
|
11
|
-
def
|
12
|
-
@
|
21
|
+
def target_count
|
22
|
+
@sam_hdr[:n_targets]
|
13
23
|
end
|
14
24
|
|
15
25
|
# FIXME: better name?
|
16
26
|
def seqs
|
17
|
-
Array.new(@
|
18
|
-
|
27
|
+
Array.new(@sam_hdr[:n_targets]) do |i|
|
28
|
+
LibHTS.sam_hdr_tid2name(@sam_hdr, i)
|
19
29
|
end
|
20
30
|
end
|
21
31
|
|
22
|
-
def
|
23
|
-
|
32
|
+
def to_s
|
33
|
+
LibHTS.sam_hdr_str(@sam_hdr)
|
24
34
|
end
|
25
35
|
end
|
26
36
|
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Bam
|
8
|
+
class Record
|
9
|
+
SEQ_NT16_STR = "=ACMGRSVTWYHKDBN"
|
10
|
+
|
11
|
+
def initialize(bam1_t, header)
|
12
|
+
@bam1 = bam1_t
|
13
|
+
@header = header
|
14
|
+
end
|
15
|
+
|
16
|
+
def struct
|
17
|
+
@bam1
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_ptr
|
21
|
+
@bam1.to_ptr
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :header
|
25
|
+
|
26
|
+
# def initialize_copy
|
27
|
+
# super
|
28
|
+
# end
|
29
|
+
|
30
|
+
def self.rom_sam_str; end
|
31
|
+
|
32
|
+
def tags; end
|
33
|
+
|
34
|
+
# returns the query name.
|
35
|
+
def qname
|
36
|
+
LibHTS.bam_get_qname(@bam1).read_string
|
37
|
+
end
|
38
|
+
|
39
|
+
# Set (query) name.
|
40
|
+
# def qname=(name)
|
41
|
+
# raise 'Not Implemented'
|
42
|
+
# end
|
43
|
+
|
44
|
+
# returns the tid of the record or -1 if not mapped.
|
45
|
+
def tid
|
46
|
+
@bam1[:core][:tid]
|
47
|
+
end
|
48
|
+
|
49
|
+
# returns the tid of the mate or -1 if not mapped.
|
50
|
+
def mate_tid
|
51
|
+
@bam1[:core][:mtid]
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns 0-based start position.
|
55
|
+
def start
|
56
|
+
@bam1[:core][:pos]
|
57
|
+
end
|
58
|
+
|
59
|
+
# returns end position of the read.
|
60
|
+
def stop
|
61
|
+
LibHTS.bam_endpos @bam1
|
62
|
+
end
|
63
|
+
|
64
|
+
# returns 0-based mate position
|
65
|
+
def mate_start
|
66
|
+
@bam1[:core][:mpos]
|
67
|
+
end
|
68
|
+
alias mate_pos mate_start
|
69
|
+
|
70
|
+
# returns the chromosome or '' if not mapped.
|
71
|
+
def chrom
|
72
|
+
return "" if tid == -1
|
73
|
+
|
74
|
+
LibHTS.sam_hdr_tid2name(@header, tid)
|
75
|
+
end
|
76
|
+
|
77
|
+
# returns the chromosome of the mate or '' if not mapped.
|
78
|
+
def mate_chrom
|
79
|
+
mtid = mate_tid
|
80
|
+
return "" if mtid == -1
|
81
|
+
|
82
|
+
LibHTS.sam_hdr_tid2name(@header, mtid)
|
83
|
+
end
|
84
|
+
|
85
|
+
def strand
|
86
|
+
LibHTS.bam_is_rev(@bam1) ? "-" : "+"
|
87
|
+
end
|
88
|
+
|
89
|
+
# def start=(v)
|
90
|
+
# raise 'Not Implemented'
|
91
|
+
# end
|
92
|
+
|
93
|
+
# insert size
|
94
|
+
def isize
|
95
|
+
@bam1[:core][:isize]
|
96
|
+
end
|
97
|
+
|
98
|
+
# mapping quality
|
99
|
+
def mapping_quality
|
100
|
+
@bam1[:core][:qual]
|
101
|
+
end
|
102
|
+
|
103
|
+
# returns a `Cigar` object.
|
104
|
+
def cigar
|
105
|
+
Cigar.new(LibHTS.bam_get_cigar(@bam1), @bam1[:core][:n_cigar])
|
106
|
+
end
|
107
|
+
|
108
|
+
def qlen
|
109
|
+
LibHTS.bam_cigar2qlen(
|
110
|
+
@bam1[:core][:n_cigar],
|
111
|
+
LibHTS.bam_get_cigar(@bam1)
|
112
|
+
)
|
113
|
+
end
|
114
|
+
|
115
|
+
def rlen
|
116
|
+
LibHTS.bam_cigar2rlen(
|
117
|
+
@bam1[:core][:n_cigar],
|
118
|
+
LibHTS.bam_get_cigar(@bam1)
|
119
|
+
)
|
120
|
+
end
|
121
|
+
|
122
|
+
# return the read sequence
|
123
|
+
def sequence
|
124
|
+
r = LibHTS.bam_get_seq(@bam1)
|
125
|
+
seq = String.new
|
126
|
+
(@bam1[:core][:l_qseq]).times do |i|
|
127
|
+
seq << SEQ_NT16_STR[LibHTS.bam_seqi(r, i)]
|
128
|
+
end
|
129
|
+
seq
|
130
|
+
end
|
131
|
+
|
132
|
+
# return only the base of the requested index "i" of the query sequence.
|
133
|
+
def base_at(n)
|
134
|
+
n += @bam1[:core][:l_qseq] if n < 0
|
135
|
+
return "." if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
|
136
|
+
|
137
|
+
r = LibHTS.bam_get_seq(@bam1)
|
138
|
+
SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
|
139
|
+
end
|
140
|
+
|
141
|
+
# return the base qualities
|
142
|
+
def base_qualities
|
143
|
+
q_ptr = LibHTS.bam_get_qual(@bam1)
|
144
|
+
q_ptr.read_array_of_uint8(@bam1[:core][:l_qseq])
|
145
|
+
end
|
146
|
+
|
147
|
+
# return only the base quality of the requested index "i" of the query sequence.
|
148
|
+
def base_quality_at(n)
|
149
|
+
n += @bam1[:core][:l_qseq] if n < 0
|
150
|
+
return 0 if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base_quality_at(-1000)
|
151
|
+
|
152
|
+
q_ptr = LibHTS.bam_get_qual(@bam1)
|
153
|
+
q_ptr.get_uint8(n)
|
154
|
+
end
|
155
|
+
|
156
|
+
def flag_str
|
157
|
+
LibHTS.bam_flag2str(@bam1[:core][:flag])
|
158
|
+
end
|
159
|
+
|
160
|
+
# returns a `Flag` object.
|
161
|
+
def flag
|
162
|
+
Flag.new(@bam1[:core][:flag])
|
163
|
+
end
|
164
|
+
|
165
|
+
def tag(str)
|
166
|
+
aux = LibHTS.bam_aux_get(@bam1, str)
|
167
|
+
return nil if aux.null?
|
168
|
+
|
169
|
+
t = aux.read_string(1)
|
170
|
+
|
171
|
+
# A (character), B (general array),
|
172
|
+
# f (real number), H (hexadecimal array),
|
173
|
+
# i (integer), or Z (string).
|
174
|
+
|
175
|
+
case t
|
176
|
+
when "i", "I", "c", "C", "s", "S"
|
177
|
+
LibHTS.bam_aux2i(aux)
|
178
|
+
when "f", "d"
|
179
|
+
LibHTS.bam_aux2f(aux)
|
180
|
+
when "Z", "H"
|
181
|
+
LibHTS.bam_aux2Z(aux)
|
182
|
+
when "A" # char
|
183
|
+
LibHTS.bam_aux2A(aux).chr
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def to_s
|
188
|
+
kstr = LibHTS::KString.new
|
189
|
+
raise "Failed to format bam record" if LibHTS.sam_format1(@header.struct, @bam1, kstr) == -1
|
190
|
+
|
191
|
+
kstr[:s]
|
192
|
+
end
|
193
|
+
|
194
|
+
# TODO:
|
195
|
+
# def eql?
|
196
|
+
# def hash
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
data/lib/hts/bam.rb
CHANGED
@@ -3,76 +3,111 @@
|
|
3
3
|
# Based on hts-python
|
4
4
|
# https://github.com/quinlan-lab/hts-python
|
5
5
|
|
6
|
-
require_relative
|
7
|
-
require_relative
|
8
|
-
require_relative
|
6
|
+
require_relative "bam/header"
|
7
|
+
require_relative "bam/cigar"
|
8
|
+
require_relative "bam/flag"
|
9
|
+
require_relative "bam/record"
|
9
10
|
|
10
11
|
module HTS
|
11
12
|
class Bam
|
12
13
|
include Enumerable
|
13
|
-
attr_reader :file_path, :mode, :header, :htf
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
attr_reader :file_path, :mode, :header
|
16
|
+
# HtfFile is FFI::BitStruct
|
17
|
+
attr_reader :htf_file
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
class << self
|
20
|
+
alias open new
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
@header = Bam::Header.new(FFI.sam_hdr_read(@htf))
|
30
|
-
@b = FFI.bam_init1
|
23
|
+
def initialize(file_path, mode = "r", create_index: nil)
|
24
|
+
file_path = File.expand_path(file_path)
|
25
|
+
|
26
|
+
unless File.exist?(file_path)
|
27
|
+
message = "No such SAM/BAM file - #{file_path}"
|
28
|
+
raise message
|
29
|
+
end
|
31
30
|
|
31
|
+
@file_path = file_path
|
32
|
+
@mode = mode
|
33
|
+
@htf_file = LibHTS.hts_open(file_path, mode)
|
34
|
+
@header = Bam::Header.new(LibHTS.sam_hdr_read(htf_file))
|
35
|
+
|
36
|
+
# read
|
37
|
+
if mode[0] == "r"
|
38
|
+
# load index
|
39
|
+
@idx = LibHTS.sam_index_load(htf_file, file_path)
|
40
|
+
# create index
|
41
|
+
if create_index || (@idx.null? && create_index.nil?)
|
42
|
+
warn "Create index for #{file_path}"
|
43
|
+
LibHTS.sam_index_build(file_path, -1)
|
44
|
+
@idx = LibHTS.sam_index_load(htf_file, file_path)
|
45
|
+
end
|
32
46
|
else
|
33
|
-
# FIXME
|
34
|
-
raise
|
47
|
+
# FIXME: implement
|
48
|
+
raise "not implemented yet."
|
49
|
+
end
|
35
50
|
|
51
|
+
# IO like API
|
52
|
+
if block_given?
|
53
|
+
begin
|
54
|
+
yield self
|
55
|
+
ensure
|
56
|
+
close
|
57
|
+
end
|
36
58
|
end
|
37
59
|
end
|
38
60
|
|
39
|
-
def
|
61
|
+
def struct
|
62
|
+
htf_file
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_ptr
|
66
|
+
htf_file.to_ptr
|
67
|
+
end
|
40
68
|
|
41
69
|
def write(alns)
|
42
70
|
alns.each do
|
43
|
-
|
71
|
+
LibHTS.sam_write1(htf_file, header, alns.b) > 0 || raise
|
44
72
|
end
|
45
73
|
end
|
46
74
|
|
47
75
|
# Close the current file.
|
48
76
|
def close
|
49
|
-
|
77
|
+
LibHTS.hts_close(htf_file)
|
50
78
|
end
|
51
79
|
|
52
80
|
# Flush the current file.
|
53
81
|
def flush
|
54
|
-
|
55
|
-
# FFI.bgzf_flush(@htf.fp.bgzf)
|
82
|
+
# LibHTS.bgzf_flush(@htf_file.fp.bgzf)
|
56
83
|
end
|
57
84
|
|
58
|
-
def each
|
85
|
+
def each
|
59
86
|
# Each does not always start at the beginning of the file.
|
60
87
|
# This is the common behavior of IO objects in Ruby.
|
61
88
|
# This may change in the future.
|
62
|
-
|
89
|
+
return to_enum(__method__) unless block_given?
|
90
|
+
|
91
|
+
while LibHTS.sam_read1(htf_file, header, bam1 = LibHTS.bam_init1) > 0
|
92
|
+
record = Record.new(bam1, header)
|
93
|
+
yield record
|
94
|
+
end
|
95
|
+
self
|
63
96
|
end
|
64
97
|
|
65
98
|
# query [WIP]
|
66
99
|
def query(region)
|
67
|
-
qiter =
|
100
|
+
qiter = LibHTS.sam_itr_querys(@idx, header, region)
|
68
101
|
begin
|
69
|
-
|
102
|
+
bam1 = LibHTS.bam_init1
|
103
|
+
slen = LibHTS.sam_itr_next(htf_file, qiter, bam1)
|
70
104
|
while slen > 0
|
71
|
-
yield
|
72
|
-
|
105
|
+
yield Record.new(bam1, header)
|
106
|
+
bam1 = LibHTS.bam_init1
|
107
|
+
slen = LibHTS.sam_itr_next(htf_file, qiter, bam1)
|
73
108
|
end
|
74
109
|
ensure
|
75
|
-
|
110
|
+
LibHTS.hts_itr_destroy(qiter)
|
76
111
|
end
|
77
112
|
end
|
78
113
|
end
|