htslib 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d4fe8c4a8f710ee3b35793e997f394ba8f80e0fe5b507768de2af2c5ab1297a0
4
- data.tar.gz: 1bb06ced745342f4de8665f046304364c9d419ae05eee355c3cd852d6b2b454a
3
+ metadata.gz: 30f42b474bc317136d665b00781fbfcb11caaf588b091e76bc86bf9cdf8d5e3f
4
+ data.tar.gz: d48e5f74fb0efed4de5af2955b0093d1eb7ef5ee7767bc7ee50bf3e296e7ce28
5
5
  SHA512:
6
- metadata.gz: 8c7fb677d2462a1ddf4cd146d9f9962b570f39762e98d893d77f89bbbf61c92329ed8391030f772bf4f98fb3dc874edc3c365dd2d85266e1e966b24effc1715c
7
- data.tar.gz: fee9ec647e3ef51e83a1aec1c62ac02c796dcfefe4d5fd6c901e4b04e4253619e7337cee651df1ece9ca58c74a7a50620befd5cd8ab823696d0f893a437bd562
6
+ metadata.gz: 6c1bf27a8fdc04a4a9ba678923df5bb579439c286802a5d1f2a4e6f11d7102217eafa0e4e42c2fa853e9ee82c706756315a0a1d6f97c5b5fab58ee909add4eb0
7
+ data.tar.gz: 59219371057e45cf31951eda2dae250acaedd12c593c09fb08f19720818be514797c57e9b45be8e1f6ea60f2fbc79fe6038a1aced6ba66d8f39a544eb6516a0a
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # HTSlib
1
+ # ruby-htslib
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/htslib.svg)](https://badge.fury.io/rb/htslib)
4
4
  ![CI](https://github.com/kojix2/ruby-htslib/workflows/CI/badge.svg)
@@ -10,7 +10,7 @@
10
10
 
11
11
  :apple: Feel free to fork it out if you can develop it!
12
12
 
13
- :bowtie: Just a prototype. Pre-alpha stage.
13
+ :bowtie: alpha stage.
14
14
 
15
15
  ## Requirements
16
16
 
@@ -24,7 +24,7 @@
24
24
  gem install htslib
25
25
  ```
26
26
 
27
- If you installed htslib with Ubuntu/apt or Mac/homebrew, pkg-config will automatically detect the location of the shared library.
27
+ If you installed htslib with Ubuntu/apt or Mac/homebrew, [pkg-config](https://github.com/ruby-gnome/pkg-config) will automatically detect the location of the shared library.
28
28
 
29
29
  Or you can set the environment variable `HTSLIBDIR`.
30
30
 
@@ -34,33 +34,39 @@ export HTSLIBDIR="/your/path/to/htslib" # libhts.so
34
34
 
35
35
  ## Usage
36
36
 
37
- HTS::FFI - Low-level API
37
+ ### Low level API
38
+
39
+ HTS::LibHTS
38
40
 
39
41
  ```ruby
40
42
  require 'htslib'
41
43
 
42
- a = HTS::FFI.hts_open("a.bam", "r")
43
- b = HTS::FFI.hts_get_format(a)
44
+ a = HTS::LibHTS.hts_open("a.bam", "r")
45
+ b = HTS::LibHTS.hts_get_format(a)
44
46
  p b[:category]
45
47
  p b[:format]
46
48
  ```
47
49
 
48
- A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) is under development.
50
+ Note: Managed struct is not used in ruby-htslib. You may need to free the memory by yourself.
51
+
52
+ ### High level API
53
+
54
+ A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) or [hts-nim](https://github.com/brentp/hts-nim) is under development. We will change and improve the API to make it better.
49
55
 
50
56
  ```ruby
51
57
  require 'htslib'
52
58
 
53
59
  bam = HTS::Bam.new("a.bam")
54
60
 
55
- bam.each do |aln|
56
- p name: aln.qname,
57
- flag: aln.flag,
58
- start: aln.start + 1,
59
- mpos: aln.mate_pos + 1,
60
- mqual: aln.mapping_quality,
61
- seq: aln.sequence,
62
- cigar: aln.cigar.to_s,
63
- qual: aln.base_qualities.map { |i| (i + 33).chr }.join
61
+ bam.each do |r|
62
+ p name: r.qname,
63
+ flag: r.flag,
64
+ start: r.start + 1,
65
+ mpos: r.mate_pos + 1,
66
+ mqual: r.mapping_quality,
67
+ seq: r.sequence,
68
+ cigar: r.cigar.to_s,
69
+ qual: r.base_qualities.map { |i| (i + 33).chr }.join
64
70
  end
65
71
  ```
66
72
 
@@ -80,6 +86,9 @@ bundle exec rake htslib:build
80
86
  bundle exec rake test
81
87
  ```
82
88
 
89
+ [c2ffi](https://github.com/rpav/c2ffi) :
90
+ I am trying to find a way to automatically generate a low-level API using c2ffi.
91
+
83
92
  ## Contributing
84
93
 
85
94
  Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
data/lib/hts/bam.rb CHANGED
@@ -3,76 +3,82 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
- require_relative 'bam/header'
7
- require_relative 'bam/cigar'
8
- require_relative 'bam/alignment'
6
+ require_relative "bam/header"
7
+ require_relative "bam/cigar"
8
+ require_relative "bam/flag"
9
+ require_relative "bam/record"
9
10
 
10
11
  module HTS
11
12
  class Bam
12
13
  include Enumerable
13
- attr_reader :file_path, :mode, :header, :htf
14
+ attr_reader :file_path, :mode, :htf, :header
14
15
 
15
- def initialize(file_path, mode = 'r', create_index: nil, header: nil, fasta: nil)
16
- @file_path = File.expand_path(file_path)
17
- File.exist?(@file_path) || raise("No such SAM/BAM file - #{@file_path}")
16
+ def initialize(file_path, mode = "r", create_index: nil)
17
+ file_path = File.expand_path(file_path)
18
18
 
19
- @mode = mode
20
- @htf = FFI.hts_open(@file_path, mode)
19
+ raise("No such SAM/BAM file - #{file_path}") unless File.exist?(file_path)
21
20
 
22
- if mode[0] == 'r'
23
- @idx = FFI.sam_index_load(@htf, @file_path)
24
- if (@idx.null? && create_index.nil?) || create_index
25
- FFI.sam_index_build(file_path, -1)
26
- @idx = FFI.sam_index_load(@htf, @file_path)
27
- warn 'NO querying'
28
- end
29
- @header = Bam::Header.new(FFI.sam_hdr_read(@htf))
30
- @b = FFI.bam_init1
21
+ @file_path = file_path
22
+ @mode = mode
23
+ @htf = LibHTS.hts_open(@file_path, mode)
24
+ @header = Bam::Header.new(LibHTS.sam_hdr_read(htf))
25
+ # FIXME: should be defined here?
26
+ @b = LibHTS.bam_init1
31
27
 
28
+ # read
29
+ if mode[0] == "r"
30
+ # load index
31
+ @idx = LibHTS.sam_index_load(htf, file_path)
32
+ # create index
33
+ if create_index || (@idx.null? && create_index.nil?)
34
+ warn "Create index for #{file_path}"
35
+ LibHTS.sam_index_build(file_path, -1)
36
+ @idx = LibHTS.sam_index_load(@htf, @file_path)
37
+ end
32
38
  else
33
- # FIXME
34
- raise 'not implemented yet.'
35
-
39
+ # FIXME: implement
40
+ raise "not implemented yet."
36
41
  end
37
42
  end
38
43
 
39
- def self.header_from_fasta; end
40
-
41
44
  def write(alns)
42
45
  alns.each do
43
- FFI.sam_write1(@htf, @header, alns.b) > 0 || raise
46
+ LibHTS.sam_write1(htf, header, alns.b) > 0 || raise
44
47
  end
45
48
  end
46
49
 
47
50
  # Close the current file.
48
51
  def close
49
- FFI.hts_close(@htf)
52
+ LibHTS.hts_close(htf)
50
53
  end
51
54
 
52
55
  # Flush the current file.
53
56
  def flush
54
57
  raise
55
- # FFI.bgzf_flush(@htf.fp.bgzf)
58
+ # LibHTS.bgzf_flush(@htf.fp.bgzf)
56
59
  end
57
60
 
58
61
  def each(&block)
59
62
  # Each does not always start at the beginning of the file.
60
63
  # This is the common behavior of IO objects in Ruby.
61
64
  # This may change in the future.
62
- block.call(Alignment.new(@b, @header.h)) while FFI.sam_read1(@htf, @header.h, @b) > 0
65
+ while LibHTS.sam_read1(htf, header.h, @b) > 0
66
+ record = Record.new(@b, header.h)
67
+ block.call(record)
68
+ end
63
69
  end
64
70
 
65
71
  # query [WIP]
66
72
  def query(region)
67
- qiter = FFI.sam_itr_querys(@idx, @header.h, region)
73
+ qiter = LibHTS.sam_itr_querys(@idx, header.h, region)
68
74
  begin
69
- slen = FFI.sam_itr_next(@htf, qiter, @b)
75
+ slen = LibHTS.sam_itr_next(htf, qiter, @b)
70
76
  while slen > 0
71
- yield Alignment.new(@b, @header.h)
72
- slen = FFI.sam_itr_next(@htf, qiter, @b)
77
+ yield Record.new(@b, header.h)
78
+ slen = LibHTS.sam_itr_next(htf, qiter, @b)
73
79
  end
74
80
  ensure
75
- FFI.hts_itr_destroy(qiter)
81
+ LibHTS.hts_itr_destroy(qiter)
76
82
  end
77
83
  end
78
84
  end
data/lib/hts/bam/cigar.rb CHANGED
@@ -7,7 +7,7 @@ module HTS
7
7
  class Bam
8
8
  class Cigar
9
9
  include Enumerable
10
- OPS = 'MIDNSHP=XB'
10
+ OPS = "MIDNSHP=XB"
11
11
 
12
12
  def initialize(cigar, n_cigar)
13
13
  @c = cigar
@@ -21,8 +21,8 @@ module HTS
21
21
  def each
22
22
  @n_cigar.times do |i|
23
23
  c = @c[i].read_uint32
24
- yield [FFI.bam_cigar_oplen(c),
25
- FFI.bam_cigar_opchr(c)]
24
+ yield [LibHTS.bam_cigar_oplen(c),
25
+ LibHTS.bam_cigar_opchr(c)]
26
26
  end
27
27
  end
28
28
  end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-nim
4
+ # https://github.com/brentp/hts-nim/blob/master/src/hts/bam/flag.nim
5
+
6
+ module HTS
7
+ class Bam
8
+ class Flag
9
+ def initialize(flag_value)
10
+ @value = flag_value # tytpe check?
11
+ end
12
+
13
+ attr_accessor :value
14
+
15
+ # BAM_FPAIRED = 1
16
+ # BAM_FPROPER_PAIR = 2
17
+ # BAM_FUNMAP = 4
18
+ # BAM_FMUNMAP = 8
19
+ # BAM_FREVERSE = 16
20
+ # BAM_FMREVERSE = 32
21
+ # BAM_FREAD1 = 64
22
+ # BAM_FREAD2 = 128
23
+ # BAM_FSECONDARY = 256
24
+ # BAM_FQCFAIL = 512
25
+ # BAM_FDUP = 1024
26
+ # BAM_FSUPPLEMENTARY = 2048
27
+
28
+ # TODO: Enabling bitwise operations
29
+ # hts-nim
30
+ # proc `and`*(f: Flag, o: uint16): uint16 {. borrow, inline .}
31
+ # proc `and`*(f: Flag, o: Flag): uint16 {. borrow, inline .}
32
+ # proc `or`*(f: Flag, o: uint16): uint16 {. borrow .}
33
+ # proc `or`*(o: uint16, f: Flag): uint16 {. borrow .}
34
+ # proc `==`*(f: Flag, o: Flag): bool {. borrow, inline .}
35
+ # proc `==`*(f: Flag, o: uint16): bool {. borrow, inline .}
36
+ # proc `==`*(o: uint16, f: Flag): bool {. borrow, inline .}
37
+
38
+ def paired?
39
+ has_flag? LibHTS::BAM_FPAIRED
40
+ end
41
+
42
+ def proper_pair?
43
+ has_flag? LibHTS::BAM_FPROPER_PAIR
44
+ end
45
+
46
+ def unmapped?
47
+ has_flag? LibHTS::BAM_FUNMAP
48
+ end
49
+
50
+ def mate_unmapped?
51
+ has_flag? LibHTS::BAM_FMUNMAP
52
+ end
53
+
54
+ def reverse?
55
+ has_flag? LibHTS::BAM_FREVERSE
56
+ end
57
+
58
+ def mate_reverse?
59
+ has_flag? LibHTS::BAM_FMREVERSE
60
+ end
61
+
62
+ def read1?
63
+ has_flag? LibHTS::BAM_FREAD1
64
+ end
65
+
66
+ def read2?
67
+ has_flag? LibHTS::BAM_FREAD2
68
+ end
69
+
70
+ def secondary?
71
+ has_flag? LibHTS::BAM_FSECONDARY
72
+ end
73
+
74
+ def qcfail?
75
+ has_flag? LibHTS::BAM_FQCFAIL
76
+ end
77
+
78
+ def dup?
79
+ has_flag? LibHTS::BAM_FDUP
80
+ end
81
+
82
+ def supplementary?
83
+ has_flag? LibHTS::BAM_FSUPPLEMENTARY
84
+ end
85
+
86
+ def has_flag?(o)
87
+ (@value & o) != 0
88
+ end
89
+ end
90
+ end
91
+ end
@@ -15,12 +15,12 @@ module HTS
15
15
  # FIXME: better name?
16
16
  def seqs
17
17
  Array.new(@h[:n_targets]) do |i|
18
- FFI.sam_hdr_tid2name(@h, i)
18
+ LibHTS.sam_hdr_tid2name(@h, i)
19
19
  end
20
20
  end
21
21
 
22
22
  def text
23
- FFI.sam_hdr_str(@h)
23
+ LibHTS.sam_hdr_str(@h)
24
24
  end
25
25
  end
26
26
  end
@@ -5,7 +5,9 @@
5
5
 
6
6
  module HTS
7
7
  class Bam
8
- class Alignment
8
+ class Record
9
+ SEQ_NT16_STR = "=ACMGRSVTWYHKDBN"
10
+
9
11
  def initialize(bam1_t, bam_hdr_t)
10
12
  @b = bam1_t
11
13
  @h = bam_hdr_t
@@ -19,9 +21,9 @@ module HTS
19
21
 
20
22
  def tags; end
21
23
 
22
- # Read (query) name.
24
+ # returns the query name.
23
25
  def qname
24
- FFI.bam_get_qname(@b).read_string
26
+ LibHTS.bam_get_qname(@b).read_string
25
27
  end
26
28
 
27
29
  # Set (query) name.
@@ -29,12 +31,9 @@ module HTS
29
31
  # raise 'Not Implemented'
30
32
  # end
31
33
 
32
- # returns the chromosome of the mate or '' if not mapped.
33
- def mate_chrom
34
- tid = @b[:core][:mtid]
35
- return '' if tid == -1
36
-
37
- FFI.sam_hdr_tid2name(@h, tid)
34
+ # returns the tid of the record or -1 if not mapped.
35
+ def tid
36
+ @b[:core][:tid]
38
37
  end
39
38
 
40
39
  # returns the tid of the mate or -1 if not mapped.
@@ -42,16 +41,6 @@ module HTS
42
41
  @b[:core][:mtid]
43
42
  end
44
43
 
45
- # returns the tid of the alignment or -1 if not mapped.
46
- def tid
47
- @b[:core][:tid]
48
- end
49
-
50
- # mate position
51
- def mate_pos
52
- @b[:core][:mpos]
53
- end
54
-
55
44
  # returns 0-based start position.
56
45
  def start
57
46
  @b[:core][:pos]
@@ -59,19 +48,33 @@ module HTS
59
48
 
60
49
  # returns end position of the read.
61
50
  def stop
62
- FFI.bam_endpos @b
51
+ LibHTS.bam_endpos @b
52
+ end
53
+
54
+ # returns 0-based mate position
55
+ def mate_start
56
+ @b[:core][:mpos]
63
57
  end
58
+ alias mate_pos mate_start
64
59
 
65
60
  # returns the chromosome or '' if not mapped.
66
61
  def chrom
67
62
  tid = @b[:core][:tid]
68
- return '' if tid == -1
63
+ return "" if tid == -1
64
+
65
+ LibHTS.sam_hdr_tid2name(@h, tid)
66
+ end
67
+
68
+ # returns the chromosome of the mate or '' if not mapped.
69
+ def mate_chrom
70
+ tid = @b[:core][:mtid]
71
+ return "" if tid == -1
69
72
 
70
- FFI.sam_hdr_tid2name(@h, tid)
73
+ LibHTS.sam_hdr_tid2name(@h, tid)
71
74
  end
72
75
 
73
76
  def strand
74
- FFI.bam_is_rev(@b) ? '-' : '+'
77
+ LibHTS.bam_is_rev(@b) ? "-" : "+"
75
78
  end
76
79
 
77
80
  # def start=(v)
@@ -90,61 +93,64 @@ module HTS
90
93
 
91
94
  # returns a `Cigar` object.
92
95
  def cigar
93
- Cigar.new(FFI.bam_get_cigar(@b), @b[:core][:n_cigar])
96
+ Cigar.new(LibHTS.bam_get_cigar(@b), @b[:core][:n_cigar])
94
97
  end
95
98
 
96
99
  def qlen
97
- FFI.bam_cigar2qlen(
100
+ LibHTS.bam_cigar2qlen(
98
101
  @b[:core][:n_cigar],
99
- FFI.bam_get_cigar(@b)
102
+ LibHTS.bam_get_cigar(@b)
100
103
  )
101
104
  end
102
105
 
103
106
  def rlen
104
- FFI.bam_cigar2rlen(
107
+ LibHTS.bam_cigar2rlen(
105
108
  @b[:core][:n_cigar],
106
- FFI.bam_get_cigar(@b)
109
+ LibHTS.bam_get_cigar(@b)
107
110
  )
108
111
  end
109
112
 
110
113
  # return the read sequence
111
114
  def sequence
112
- seq_nt16_str = '=ACMGRSVTWYHKDBN'
113
- r = FFI.bam_get_seq(@b)
114
- Array.new(@b[:core][:l_qseq]) do |i|
115
- seq_nt16_str[FFI.bam_seqi(r, i)]
116
- end.join
115
+ r = LibHTS.bam_get_seq(@b)
116
+ seq = String.new
117
+ (@b[:core][:l_qseq]).times do |i|
118
+ seq << SEQ_NT16_STR[LibHTS.bam_seqi(r, i)]
119
+ end
120
+ seq
117
121
  end
118
122
 
123
+ # return only the base of the requested index "i" of the query sequence.
119
124
  def base_at(n)
120
125
  n += @b[:core][:l_qseq] if n < 0
121
- seq_nt16_str = '=ACMGRSVTWYHKDBN'
122
- return '.' if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
126
+ return "." if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
123
127
 
124
- r = FFI.bam_get_seq(@b)
125
- seq_nt16_str[FFI.bam_seqi(r, n)]
128
+ r = LibHTS.bam_get_seq(@b)
129
+ SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
126
130
  end
127
131
 
132
+ # return the base qualities
128
133
  def base_qualities
129
- q_ptr = FFI.bam_get_qual(@b)
134
+ q_ptr = LibHTS.bam_get_qual(@b)
130
135
  q_ptr.read_array_of_uint8(@b[:core][:l_qseq])
131
136
  end
132
137
 
138
+ # return only the base quality of the requested index "i" of the query sequence.
133
139
  def base_quality_at(n)
134
- n += @b[:core][:l_qseq] if n < 0 # eg. base_quality_at(-1000)
135
- return 0 if (n >= @b[:core][:l_qseq]) || (n < 0)
140
+ n += @b[:core][:l_qseq] if n < 0
141
+ return 0 if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_quality_at(-1000)
136
142
 
137
- q_ptr = FFI.bam_get_qual(@b)
143
+ q_ptr = LibHTS.bam_get_qual(@b)
138
144
  q_ptr.get_uint8(n)
139
145
  end
140
146
 
141
147
  def flag_str
142
- FFI.bam_flag2str(flag)
148
+ LibHTS.bam_flag2str(@b[:core][:flag])
143
149
  end
144
150
 
145
151
  # returns a `Flag` object.
146
152
  def flag
147
- @b[:core][:flag]
153
+ Flag.new(@b[:core][:flag])
148
154
  end
149
155
 
150
156
  # TODO: