htslib 0.2.3 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +65 -47
- data/TUTORIAL.md +270 -0
- data/lib/hts/bam/auxi.rb +28 -2
- data/lib/hts/bam/cigar.rb +46 -6
- data/lib/hts/bam/flag.rb +43 -4
- data/lib/hts/bam/header.rb +53 -2
- data/lib/hts/bam/header_record.rb +11 -0
- data/lib/hts/bam/record.rb +66 -24
- data/lib/hts/bam.rb +88 -73
- data/lib/hts/bcf/format.rb +28 -24
- data/lib/hts/bcf/header.rb +79 -2
- data/lib/hts/bcf/header_record.rb +35 -1
- data/lib/hts/bcf/info.rb +28 -24
- data/lib/hts/bcf.rb +118 -98
- data/lib/hts/faidx/sequence.rb +64 -0
- data/lib/hts/faidx.rb +64 -15
- data/lib/hts/hts.rb +12 -9
- data/lib/hts/libhts/constants.rb +46 -14
- data/lib/hts/libhts/cram.rb +1 -1
- data/lib/hts/libhts/sam.rb +4 -4
- data/lib/hts/libhts/tbx.rb +2 -0
- data/lib/hts/libhts/tbx_funcs.rb +3 -1
- data/lib/hts/libhts/vcf.rb +1 -1
- data/lib/hts/libhts/vcf_funcs.rb +16 -8
- data/lib/hts/tbx.rb +50 -5
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +1 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35551ff5a5cd81937063363c243bc2ac3d12ce09ec69697cd0eedd93945526bd
|
4
|
+
data.tar.gz: ec16e4f3cee66c50a582c4580d6cf266c0f349f8f6bc985864eda3d41b41e412
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f534244d9b6dfd8a741cf21f639205207df612a3107ce563640db7dadd591c140dba92cf6eaa68ae1a440ab9b958697d0a04fac8217ff0ca7dcea796557fe30b
|
7
|
+
data.tar.gz: ac305efbbdd989f8f44e691499a0aa978aa4ed17e24546fd48826a25c2591a8f70a8ec0411456e123248597c518e4c41dc057fbd6738be49b96fc5198ba643cd
|
data/README.md
CHANGED
@@ -8,17 +8,16 @@
|
|
8
8
|
|
9
9
|
Ruby-htslib is the [Ruby](https://www.ruby-lang.org) bindings to [HTSlib](https://github.com/samtools/htslib), a C library for high-throughput sequencing data formats. It allows you to read and write file formats commonly used in genomics, such as [SAM, BAM, VCF, and BCF](http://samtools.github.io/hts-specs/), in the Ruby language.
|
10
10
|
|
11
|
-
:apple: Feel free to fork it
|
12
|
-
|
13
|
-
:bowtie: Alpha stage.
|
11
|
+
:apple: Feel free to fork it!
|
14
12
|
|
15
13
|
## Requirements
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
- [Ruby](https://github.com/ruby/ruby) 3.1 or above.
|
16
|
+
- [HTSlib](https://github.com/samtools/htslib)
|
17
|
+
- Ubuntu : `apt install libhts-dev`
|
18
|
+
- macOS : `brew install htslib`
|
19
|
+
- Windows : [mingw-w64-htslib](https://packages.msys2.org/base/mingw-w64-htslib) is automatically fetched when installing the gem ([RubyInstaller](https://rubyinstaller.org) only).
|
20
|
+
- Build from source code (see Development section)
|
22
21
|
|
23
22
|
## Installation
|
24
23
|
|
@@ -26,7 +25,7 @@ Ruby-htslib is the [Ruby](https://www.ruby-lang.org) bindings to [HTSlib](https:
|
|
26
25
|
gem install htslib
|
27
26
|
```
|
28
27
|
|
29
|
-
If you have installed htslib with apt on Ubuntu or homebrew on Mac, [pkg-config](https://github.com/ruby-gnome/pkg-config)
|
28
|
+
If you have installed htslib with apt on Ubuntu or homebrew on Mac, [pkg-config](https://github.com/ruby-gnome/pkg-config)
|
30
29
|
will automatically detect the location of the shared library. If pkg-config does not work well, set `PKG_CONFIG_PATH`.
|
31
30
|
Alternatively, you can specify the directory of the shared library by setting the environment variable `HTSLIBDIR`.
|
32
31
|
|
@@ -34,11 +33,13 @@ Alternatively, you can specify the directory of the shared library by setting th
|
|
34
33
|
export HTSLIBDIR="/your/path/to/htslib" # libhts.so
|
35
34
|
```
|
36
35
|
|
37
|
-
|
36
|
+
ruby-htslib also works on Windows; if you use RubyInstaller, htslib will be prepared automatically.
|
37
|
+
|
38
|
+
## Usage
|
38
39
|
|
39
|
-
###
|
40
|
+
### HTS::Bam - SAM / BAM / CRAM - Sequence Alignment Map file
|
40
41
|
|
41
|
-
|
42
|
+
Reading fields
|
42
43
|
|
43
44
|
```ruby
|
44
45
|
require 'htslib'
|
@@ -56,14 +57,16 @@ bam.each do |r|
|
|
56
57
|
mpos: r.mpos + 1,
|
57
58
|
isiz: r.isize,
|
58
59
|
seqs: r.seq,
|
59
|
-
qual: r.
|
60
|
+
qual: r.qual_string,
|
60
61
|
MC: r.aux("MC")
|
61
62
|
end
|
62
63
|
|
63
64
|
bam.close
|
64
65
|
```
|
65
66
|
|
66
|
-
|
67
|
+
### HTS::Bcf - VCF / BCF - Variant Call Format file
|
68
|
+
|
69
|
+
Reading fields
|
67
70
|
|
68
71
|
```ruby
|
69
72
|
bcf = HTS::Bcf.open("b.bcf")
|
@@ -83,22 +86,27 @@ end
|
|
83
86
|
bcf.close
|
84
87
|
```
|
85
88
|
|
86
|
-
|
87
|
-
<summary><b>Faidx</b></summary>
|
89
|
+
### HTS::Faidx - FASTA / FASTQ - Nucleic acid sequence
|
88
90
|
|
89
91
|
```ruby
|
90
92
|
fa = HTS::Faidx.open("c.fa")
|
93
|
+
fa.seq("chr1:1-10")
|
94
|
+
```
|
91
95
|
|
92
|
-
|
96
|
+
### HTS::Tbx - Tabix - gff, bed, sam, vcf
|
93
97
|
|
94
|
-
|
98
|
+
```ruby
|
99
|
+
tb = HTS::Tbx.open("test.vcf.gz")
|
100
|
+
tb.query("chr1", 10000, 20000) do |line|
|
101
|
+
p line
|
102
|
+
end
|
95
103
|
```
|
96
104
|
|
97
|
-
|
105
|
+
Note: Faidx or Tbx should not be explicitly closed. See [#27](https://github.com/kojix2/ruby-htslib/issues/27)
|
98
106
|
|
99
|
-
### Low
|
107
|
+
### Low-level API
|
100
108
|
|
101
|
-
`HTS::LibHTS` provides native C functions.
|
109
|
+
`HTS::LibHTS` provides native C functions.
|
102
110
|
|
103
111
|
```ruby
|
104
112
|
require 'htslib'
|
@@ -109,16 +117,23 @@ p b[:category]
|
|
109
117
|
p b[:format]
|
110
118
|
```
|
111
119
|
|
112
|
-
|
120
|
+
Macro functions
|
121
|
+
|
122
|
+
htslib has a lot of macro functions for speed. Ruby-FFI cannot call C macro functions. However, essential functions are reimplemented in Ruby, and you can call them.
|
123
|
+
|
124
|
+
Structs
|
125
|
+
|
126
|
+
Only a small number of C structs are implemented with FFI's ManagedStruct, which frees memory when Ruby's garbage collection fires. Other structs will need to be freed manually.
|
113
127
|
|
114
128
|
### Need more speed?
|
115
129
|
|
116
|
-
Try Crystal. [
|
130
|
+
Try Crystal. [HTS.cr](https://github.com/bio-cr/hts.cr) is implemented in Crystal language and provides an API compatible with ruby-htslib. Crystal language is not as flexible as Ruby language. You can not use `eval` methods and must always be careful with the types. Writing one-time scripts in Crystal may be less fun. However, if you have a clear idea of what you want to do in your mind, have already written code in Ruby, and need to run them over and over, try creating a command line tool in Crystal. The Crystal language is as fast as the Rust and C languages. It will give you incredible power to make tools.
|
117
131
|
|
118
132
|
## Documentation
|
119
133
|
|
120
|
-
|
121
|
-
|
134
|
+
- [TUTORIAL.md](TUTORIAL.md)
|
135
|
+
- [API Documentation (develop branch)](https://kojix2.github.io/ruby-htslib/)
|
136
|
+
- [RubyDoc.info - HTSlib](https://rdoc.info/gems/htslib)
|
122
137
|
|
123
138
|
## Development
|
124
139
|
|
@@ -134,54 +149,57 @@ bundle exec rake test
|
|
134
149
|
|
135
150
|
[GNU Autotools](https://en.wikipedia.org/wiki/GNU_Autotools) is required to compile htslib.
|
136
151
|
|
137
|
-
|
152
|
+
HTSlib has many macro functions. These macro functions cannot be called from FFI and must be reimplemented in Ruby.
|
138
153
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
154
|
+
- Use the new version of Ruby to take full advantage of Ruby's potential.
|
155
|
+
- This is possible because we have a small number of users.
|
156
|
+
- Remain compatible with [HTS.cr](https://github.com/bio-cr/hts.cr).
|
157
|
+
- The most challenging part is the return value. In the Crystal language, methods are expected to return only one type. On the other hand, in the Ruby language, methods that return multiple classes are very common. For example, in the Crystal language, the compiler gets confused if the return value is one of six types: Int32, Int64, Float32, Float64, Nil, or String. In fact Crystal allows you to do that. But the code gets a little messy. In Ruby, this is very common and doesn't cause any problems.
|
158
|
+
- Ruby and Crystal are languages that use garbage collection. However, the memory release policy for allocated C structures is slightly different: in Ruby-FFI, you can define a `self.release` method in `FFI::Struct`. This method is called when GC. So you don't have to worry about memory in high-level APIs like Bam::Record or Bcf::Record, etc. Crystal requires you to define a finalize method on each class. So you need to define it in Bam::Record or Bcf::Record.
|
143
159
|
|
144
|
-
|
160
|
+
Method naming generally follows the Rust-htslib API.
|
145
161
|
|
146
162
|
#### FFI Extensions
|
147
163
|
|
148
|
-
|
164
|
+
- [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) : Extension of Ruby-FFI to support bitfields.
|
149
165
|
|
150
|
-
#### Automatic
|
166
|
+
#### Automatic validation
|
151
167
|
|
152
|
-
|
168
|
+
In the `script` directory, there are several tools to help implement ruby-htslib. Scripts using c2ffi can check the coverage of htslib functions in Ruby-htslib. They are useful when new versions of htslib are released.
|
153
169
|
|
154
|
-
|
170
|
+
- [c2ffi](https://github.com/rpav/c2ffi) is a tool to create JSON format metadata from C header files.
|
155
171
|
|
156
|
-
|
172
|
+
## Contributing
|
157
173
|
|
158
|
-
|
159
|
-
* Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-htslib/pulls)
|
160
|
-
* Write, clarify, or fix documentation
|
161
|
-
* Suggest or add new features
|
162
|
-
* [financial contributions](https://github.com/sponsors/kojix2)
|
174
|
+
Ruby-htslib is a library under development, so even minor improvements like typo fixes are welcome! Please feel free to send us your pull requests.
|
163
175
|
|
176
|
+
- [Report bugs](https://github.com/kojix2/ruby-htslib/issues)
|
177
|
+
- Fix bugs and [submit pull requests](https://github.com/kojix2/ruby-htslib/pulls)
|
178
|
+
- Write, clarify, or fix documentation
|
179
|
+
- Suggest or add new features
|
180
|
+
- [financial contributions](https://github.com/sponsors/kojix2)
|
164
181
|
|
165
|
-
```
|
182
|
+
```markdown
|
166
183
|
# Ownership and Commitment Rights
|
167
184
|
|
168
|
-
Do you need commit rights to ruby-htslib repository?
|
185
|
+
Do you need commit rights to the ruby-htslib repository?
|
169
186
|
Do you want to get admin rights and take over the project?
|
170
187
|
If so, please feel free to contact us @kojix2.
|
171
188
|
```
|
172
189
|
|
173
|
-
#### Why do you implement htslib in a language like Ruby, which is not widely used in
|
190
|
+
#### Why do you implement htslib in a language like Ruby, which is not widely used in bioinformatics?
|
174
191
|
|
175
|
-
One of the greatest joys of using a minor language like Ruby in bioinformatics is that
|
192
|
+
One of the greatest joys of using a minor language like Ruby in bioinformatics is that nothing stops you from reinventing the wheel. Reinventing the wheel can be fun. But with languages like Python and R, where many bioinformatics masters work, there is no chance for beginners to create htslib bindings. Bioinformatics file formats, libraries, and tools are very complex, and I need to learn how to understand them. So I started to implement the HTSLib binding myself to better understand how the pioneers of bioinformatics felt when establishing the file format and how they created their tools. I hope one day we can work on bioinformatics using Ruby and Crystal languages, not to replace other languages such as Python and R, but to add new power and value to this advancing field.
|
176
193
|
|
177
194
|
## Links
|
178
195
|
|
179
|
-
|
180
|
-
|
196
|
+
- [samtools/hts-spec](https://github.com/samtools/hts-specs)
|
197
|
+
- [bioruby](https://github.com/bioruby/bioruby)
|
181
198
|
|
182
199
|
## Funding support
|
183
200
|
|
184
201
|
This work was supported partially by [Ruby Association Grant 2020](https://www.ruby.or.jp/en/news/20201022).
|
202
|
+
|
185
203
|
## License
|
186
204
|
|
187
205
|
[MIT License](https://opensource.org/licenses/MIT).
|
data/TUTORIAL.md
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
# Tutorial
|
2
|
+
|
3
|
+
```mermaid
|
4
|
+
%%{init:{'theme':'base'}}%%
|
5
|
+
classDiagram
|
6
|
+
Bam~Hts~ o-- `Bam::Header`
|
7
|
+
Bam o-- `Bam::Record`
|
8
|
+
`Bam::Record` o-- `Bam::Header`
|
9
|
+
Bcf~Hts~ o-- `Bcf::Header`
|
10
|
+
Bcf o-- `Bcf::Record`
|
11
|
+
`Bcf::Record` o--`Bcf::Header`
|
12
|
+
`Bam::Header` o-- `Bam::HeaderRecord`
|
13
|
+
`Bcf::Header` o-- `Bcf::HeaderRecord`
|
14
|
+
`Bam::Record` o-- Flag
|
15
|
+
`Bam::Record` o-- Cigar
|
16
|
+
`Bam::Record` o-- Aux
|
17
|
+
`Bcf::Record` o-- Info
|
18
|
+
`Bcf::Record` o-- Format
|
19
|
+
class Bam{
|
20
|
+
+@hts_file : FFI::Struct
|
21
|
+
+@header : Bam::Header
|
22
|
+
+@file_name
|
23
|
+
+@index_name
|
24
|
+
+@mode
|
25
|
+
+struct()
|
26
|
+
+build_index()
|
27
|
+
+each() Enumerable
|
28
|
+
+query()
|
29
|
+
}
|
30
|
+
class Bcf{
|
31
|
+
+@hts_file : FFI::Struct
|
32
|
+
+@header : Bcf::Header
|
33
|
+
+@file_name
|
34
|
+
+@index_name
|
35
|
+
+@mode
|
36
|
+
+struct()
|
37
|
+
+build_index()
|
38
|
+
+each() Enumerable
|
39
|
+
+query()
|
40
|
+
}
|
41
|
+
class Tbx~Hts~{
|
42
|
+
+@hts_file : FFI::Struct
|
43
|
+
}
|
44
|
+
class `Bam::Header`{
|
45
|
+
+@sam_hdr : FFI::Struct
|
46
|
+
+struct()
|
47
|
+
+target_count()
|
48
|
+
+target_names()
|
49
|
+
+to_s()
|
50
|
+
}
|
51
|
+
class `Bam::Record` {
|
52
|
+
+@bam1 : FFI::Struct
|
53
|
+
+@header : Bam::Header
|
54
|
+
+struct()
|
55
|
+
+tid()
|
56
|
+
+tid=()
|
57
|
+
+mtid()
|
58
|
+
+mtid=()
|
59
|
+
+pos()
|
60
|
+
+pos=()
|
61
|
+
+mpos()
|
62
|
+
+mpos=()
|
63
|
+
+bin()
|
64
|
+
+bin=()
|
65
|
+
+qname()
|
66
|
+
+flag()
|
67
|
+
+chorm()
|
68
|
+
+mapq()
|
69
|
+
+cigar()
|
70
|
+
+mate_chrom()
|
71
|
+
+isize()
|
72
|
+
+seq()
|
73
|
+
+qual()
|
74
|
+
+qual_string()
|
75
|
+
+aux()
|
76
|
+
+to_s()
|
77
|
+
}
|
78
|
+
class `Aux` {
|
79
|
+
-@record : Bam::Record
|
80
|
+
+[]()
|
81
|
+
+get_int()
|
82
|
+
+get_float()
|
83
|
+
+get_string()
|
84
|
+
}
|
85
|
+
class `Bcf::Header`{
|
86
|
+
+@bcf_hdr : FFI::Struct
|
87
|
+
+struct()
|
88
|
+
+to_s()
|
89
|
+
}
|
90
|
+
class `Bcf::Record`{
|
91
|
+
+@bcf1 : FFI::Struct
|
92
|
+
+@header : Bcf::Header
|
93
|
+
+struct()
|
94
|
+
+chrom()
|
95
|
+
+pos()
|
96
|
+
+id()
|
97
|
+
+qual()
|
98
|
+
+ref()
|
99
|
+
+alt()
|
100
|
+
+filter()
|
101
|
+
+info()
|
102
|
+
+format()
|
103
|
+
+to_s()
|
104
|
+
}
|
105
|
+
class Flag {
|
106
|
+
+@value : Integer
|
107
|
+
+paired?()
|
108
|
+
+proper_pair?()
|
109
|
+
+unmapped?()
|
110
|
+
+mate_unmapped?()
|
111
|
+
+reverse?()
|
112
|
+
+mate_reverse?()
|
113
|
+
+read1?()
|
114
|
+
+read2?()
|
115
|
+
+secondary?()
|
116
|
+
+qcfail?()
|
117
|
+
+duplicate?()
|
118
|
+
+supplementary?()
|
119
|
+
+&()
|
120
|
+
+|()
|
121
|
+
+^()
|
122
|
+
+~()
|
123
|
+
+<<()
|
124
|
+
+>>()
|
125
|
+
+to_i()
|
126
|
+
+to_s()
|
127
|
+
}
|
128
|
+
class Info {
|
129
|
+
-@record : Bcf::Record
|
130
|
+
+[]()
|
131
|
+
+get_int()
|
132
|
+
+get_float()
|
133
|
+
+get_string()
|
134
|
+
+get_flag()
|
135
|
+
+fields()
|
136
|
+
+length() +size()
|
137
|
+
+to_h()
|
138
|
+
-info_ptr()
|
139
|
+
}
|
140
|
+
class Format {
|
141
|
+
-@record : Bcf::Record
|
142
|
+
+[]()\
|
143
|
+
+get_int()
|
144
|
+
+get_float()
|
145
|
+
+get_string()
|
146
|
+
+get_flag()
|
147
|
+
+fields()
|
148
|
+
+length() +size()
|
149
|
+
+to_h()
|
150
|
+
-format_ptr()
|
151
|
+
}
|
152
|
+
class Cigar {
|
153
|
+
-@array : Array
|
154
|
+
+each() Enumerable
|
155
|
+
+qlen()
|
156
|
+
+rlen()
|
157
|
+
+to_s()
|
158
|
+
+==()
|
159
|
+
+eql?()
|
160
|
+
}
|
161
|
+
class Faidx{
|
162
|
+
+@fai
|
163
|
+
}
|
164
|
+
|
165
|
+
```
|
166
|
+
|
167
|
+
## HTS::Bam - SAM / BAM / CRAM - Sequence Alignment Map file
|
168
|
+
|
169
|
+
Reading fields
|
170
|
+
|
171
|
+
```ruby
|
172
|
+
require 'htslib'
|
173
|
+
|
174
|
+
bam = HTS::Bam.open("test/fixtures/moo.bam")
|
175
|
+
|
176
|
+
bam.each do |r|
|
177
|
+
pp name: r.qname,
|
178
|
+
flag: r.flag,
|
179
|
+
chrm: r.chrom,
|
180
|
+
strt: r.pos + 1,
|
181
|
+
mapq: r.mapq,
|
182
|
+
cigr: r.cigar.to_s,
|
183
|
+
mchr: r.mate_chrom,
|
184
|
+
mpos: r.mpos + 1,
|
185
|
+
isiz: r.isize,
|
186
|
+
seqs: r.seq,
|
187
|
+
qual: r.qual_string,
|
188
|
+
MC: r.aux("MC")
|
189
|
+
end
|
190
|
+
|
191
|
+
bam.close
|
192
|
+
```
|
193
|
+
|
194
|
+
Open with block
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
HTS::Bam.open("test/fixtures/moo.bam") do |b|
|
198
|
+
b.
|
199
|
+
do |r|
|
200
|
+
# ...
|
201
|
+
end
|
202
|
+
end
|
203
|
+
```
|
204
|
+
|
205
|
+
Writing
|
206
|
+
|
207
|
+
```ruby
|
208
|
+
in = HTS::Bam.open("foo.bam")
|
209
|
+
out = HTS::Bam.open("bar.bam", "wb")
|
210
|
+
|
211
|
+
out.header = in.header
|
212
|
+
# out.write_header(in.header)
|
213
|
+
|
214
|
+
in.each do |r|
|
215
|
+
out << r
|
216
|
+
# out.write(r)
|
217
|
+
end
|
218
|
+
|
219
|
+
in.close
|
220
|
+
out.close
|
221
|
+
```
|
222
|
+
|
223
|
+
## HTS::Bcf - VCF / BCF - Variant Call Format file
|
224
|
+
|
225
|
+
Reading fields
|
226
|
+
|
227
|
+
```ruby
|
228
|
+
bcf = HTS::Bcf.open("b.bcf")
|
229
|
+
|
230
|
+
bcf.each do |r|
|
231
|
+
p chrom: r.chrom,
|
232
|
+
pos: r.pos,
|
233
|
+
id: r.id,
|
234
|
+
qual: r.qual.round(2),
|
235
|
+
ref: r.ref,
|
236
|
+
alt: r.alt,
|
237
|
+
filter: r.filter,
|
238
|
+
info: r.info.to_h,
|
239
|
+
format: r.format.to_h
|
240
|
+
end
|
241
|
+
|
242
|
+
bcf.close
|
243
|
+
```
|
244
|
+
|
245
|
+
Open with block
|
246
|
+
|
247
|
+
```ruby
|
248
|
+
HTS::Bcf.open("b.bcf") do |b|
|
249
|
+
b.each do |r|
|
250
|
+
# ...
|
251
|
+
end
|
252
|
+
end
|
253
|
+
```
|
254
|
+
|
255
|
+
Writing
|
256
|
+
|
257
|
+
```ruby
|
258
|
+
in = HTS::Bcf.open("foo.bcf")
|
259
|
+
out = HTS::Bcf.open("bar.bcf", "wb")
|
260
|
+
|
261
|
+
out.header = in.header
|
262
|
+
# out.write_header(in.header)
|
263
|
+
in.each do |r|
|
264
|
+
out << r
|
265
|
+
# out.write(r)
|
266
|
+
end
|
267
|
+
|
268
|
+
in.close
|
269
|
+
out.close
|
270
|
+
```
|
data/lib/hts/bam/auxi.rb
CHANGED
@@ -4,21 +4,32 @@
|
|
4
4
|
#
|
5
5
|
# A. This is for compatibility with Windows.
|
6
6
|
# In Windows, aux is a reserved word
|
7
|
-
# You cannot create a file named aux.
|
7
|
+
# You cannot create a file named aux. Eww!
|
8
8
|
|
9
9
|
module HTS
|
10
10
|
class Bam < Hts
|
11
11
|
# Auxiliary record data
|
12
|
+
#
|
13
|
+
# @noge Aux is a View object.
|
14
|
+
# The result of the alignment is assigned to the bam1 structure.
|
15
|
+
# Ruby's Aux class references a part of it. There is no one-to-one
|
16
|
+
# correspondence between C structures and Ruby's Aux class.
|
12
17
|
class Aux
|
18
|
+
attr_reader :record
|
19
|
+
|
13
20
|
def initialize(record)
|
14
21
|
@record = record
|
15
22
|
end
|
16
23
|
|
24
|
+
# @note Why is this method named "get" instead of "fetch"?
|
25
|
+
# This is for compatibility with the Crystal language
|
26
|
+
# which provides methods like `get_int`, `get_float`, etc.
|
27
|
+
# I think they are better than `fetch_int`` and `fetch_float`.
|
17
28
|
def get(key, type = nil)
|
18
29
|
aux = LibHTS.bam_aux_get(@record.struct, key)
|
19
30
|
return nil if aux.null?
|
20
31
|
|
21
|
-
type
|
32
|
+
type = type ? type.to_s : aux.read_string(1)
|
22
33
|
|
23
34
|
# A (character), B (general array),
|
24
35
|
# f (real number), H (hexadecimal array),
|
@@ -38,6 +49,21 @@ module HTS
|
|
38
49
|
end
|
39
50
|
end
|
40
51
|
|
52
|
+
# For compatibility with HTS.cr.
|
53
|
+
def get_int(key)
|
54
|
+
get(key, "i")
|
55
|
+
end
|
56
|
+
|
57
|
+
# For compatibility with HTS.cr.
|
58
|
+
def get_float(key)
|
59
|
+
get(key, "f")
|
60
|
+
end
|
61
|
+
|
62
|
+
# For compatibility with HTS.cr.
|
63
|
+
def get_string(key)
|
64
|
+
get(key, "Z")
|
65
|
+
end
|
66
|
+
|
41
67
|
def [](key)
|
42
68
|
get(key)
|
43
69
|
end
|
data/lib/hts/bam/cigar.rb
CHANGED
@@ -6,11 +6,31 @@ module HTS
|
|
6
6
|
class Cigar
|
7
7
|
include Enumerable
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
# a uint32_t array (with 32 bits for every CIGAR op: length<<4|operation)
|
10
|
+
attr_accessor :array
|
11
|
+
|
12
|
+
# Create a new Cigar object from a string.
|
13
|
+
# @param [String] cigar_str
|
14
|
+
# The CIGAR string is converted to a uint32_t array in htslib.
|
15
|
+
def self.parse(str)
|
16
|
+
c = FFI::MemoryPointer.new(:pointer)
|
17
|
+
m = FFI::MemoryPointer.new(:size_t)
|
18
|
+
LibHTS.sam_parse_cigar(str, FFI::Pointer::NULL, c, m)
|
19
|
+
cigar_array = c.read_pointer.read_array_of_uint32(m.read(:size_t))
|
20
|
+
obj = new
|
21
|
+
obj.array = cigar_array
|
22
|
+
obj
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize(record = nil)
|
26
|
+
if record
|
27
|
+
# The record is used at initialization and is not retained after that.
|
28
|
+
bam1 = record.struct
|
29
|
+
n_cigar = bam1[:core][:n_cigar]
|
30
|
+
@array = LibHTS.bam_get_cigar(bam1).read_array_of_uint32(n_cigar)
|
31
|
+
else
|
32
|
+
@array = []
|
33
|
+
end
|
14
34
|
end
|
15
35
|
|
16
36
|
def to_s
|
@@ -20,12 +40,32 @@ module HTS
|
|
20
40
|
def each
|
21
41
|
return to_enum(__method__) unless block_given?
|
22
42
|
|
23
|
-
@
|
43
|
+
@array.each do |c|
|
24
44
|
op = LibHTS.bam_cigar_opchr(c)
|
25
45
|
len = LibHTS.bam_cigar_oplen(c)
|
26
46
|
yield [op, len]
|
27
47
|
end
|
28
48
|
end
|
49
|
+
|
50
|
+
def qlen
|
51
|
+
a = FFI::MemoryPointer.new(:uint32, @array.size)
|
52
|
+
a.write_array_of_uint32(@array)
|
53
|
+
LibHTS.bam_cigar2qlen(@array.size, a)
|
54
|
+
end
|
55
|
+
|
56
|
+
def rlen
|
57
|
+
a = FFI::MemoryPointer.new(:uint32, @array.size)
|
58
|
+
a.write_array_of_uint32(@array)
|
59
|
+
LibHTS.bam_cigar2rlen(@array.size, a)
|
60
|
+
end
|
61
|
+
|
62
|
+
def ==(other)
|
63
|
+
other.is_a?(Cigar) && (@array == other.array)
|
64
|
+
end
|
65
|
+
|
66
|
+
def eql?(other)
|
67
|
+
other.is_a?(Cigar) && @array.eql?(other.array)
|
68
|
+
end
|
29
69
|
end
|
30
70
|
end
|
31
71
|
end
|
data/lib/hts/bam/flag.rb
CHANGED
@@ -28,8 +28,6 @@ module HTS
|
|
28
28
|
# BAM_FDUP = 1024
|
29
29
|
# BAM_FSUPPLEMENTARY = 2048
|
30
30
|
|
31
|
-
# TODO: Enabling bitwise operations?
|
32
|
-
|
33
31
|
TABLE = { paired?: LibHTS::BAM_FPAIRED,
|
34
32
|
proper_pair?: LibHTS::BAM_FPROPER_PAIR,
|
35
33
|
unmapped?: LibHTS::BAM_FUNMAP,
|
@@ -43,16 +41,57 @@ module HTS
|
|
43
41
|
duplicate?: LibHTS::BAM_FDUP,
|
44
42
|
supplementary?: LibHTS::BAM_FSUPPLEMENTARY }.freeze
|
45
43
|
|
46
|
-
|
44
|
+
# @!macro [attach] generate_flag_methods
|
45
|
+
# @!method $1
|
46
|
+
# @return [Boolean]
|
47
|
+
def self.generate(name)
|
47
48
|
define_method(name) do
|
48
|
-
|
49
|
+
(@value & TABLE[name]) != 0
|
49
50
|
end
|
50
51
|
end
|
52
|
+
private_class_method :generate
|
53
|
+
|
54
|
+
generate :paired?
|
55
|
+
generate :proper_pair?
|
56
|
+
generate :unmapped?
|
57
|
+
generate :mate_unmapped?
|
58
|
+
generate :reverse?
|
59
|
+
generate :mate_reverse?
|
60
|
+
generate :read1?
|
61
|
+
generate :read2?
|
62
|
+
generate :secondary?
|
63
|
+
generate :qcfail?
|
64
|
+
generate :duplicate?
|
65
|
+
generate :supplementary?
|
51
66
|
|
52
67
|
def has_flag?(f)
|
53
68
|
(@value & f) != 0
|
54
69
|
end
|
55
70
|
|
71
|
+
def &(other)
|
72
|
+
Flag.new(@value & other.to_i)
|
73
|
+
end
|
74
|
+
|
75
|
+
def |(other)
|
76
|
+
Flag.new(@value | other.to_i)
|
77
|
+
end
|
78
|
+
|
79
|
+
def ^(other)
|
80
|
+
Flag.new(@value ^ other.to_i)
|
81
|
+
end
|
82
|
+
|
83
|
+
def ~
|
84
|
+
Flag.new(~@value)
|
85
|
+
end
|
86
|
+
|
87
|
+
def <<(f)
|
88
|
+
Flag.new(@value << f.to_i)
|
89
|
+
end
|
90
|
+
|
91
|
+
def >>(other)
|
92
|
+
Flag.new(@value >> other.to_i)
|
93
|
+
end
|
94
|
+
|
56
95
|
def to_i
|
57
96
|
@value
|
58
97
|
end
|