htslib 0.2.3 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,26 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "header_record"
4
+
3
5
  module HTS
4
6
  class Bam < Hts
5
7
  # A class for working with alignment header.
6
8
  class Header
7
- def initialize(hts_file)
8
- @sam_hdr = LibHTS.sam_hdr_read(hts_file)
9
+ def self.parse(str)
10
+ new(LibHTS.sam_hdr_parse(str.size, str))
11
+ end
12
+
13
+ def initialize(arg = nil)
14
+ case arg
15
+ when LibHTS::HtsFile
16
+ @sam_hdr = LibHTS.sam_hdr_read(arg)
17
+ when LibHTS::SamHdr
18
+ @sam_hdr = arg
19
+ when nil
20
+ @sam_hdr = LibHTS.sam_hdr_init
21
+ else
22
+ raise TypeError, "Invalid argument"
23
+ end
9
24
  end
10
25
 
11
26
  def struct
@@ -17,6 +32,7 @@ module HTS
17
32
  end
18
33
 
19
34
  def target_count
35
+ # FIXME: sam_hdr_nref
20
36
  @sam_hdr[:n_targets]
21
37
  end
22
38
 
@@ -32,6 +48,41 @@ module HTS
32
48
  end
33
49
  end
34
50
 
51
+ # experimental
52
+ def add_lines(str)
53
+ LibHTS.sam_hdr_add_lines(@sam_hdr, str, 0)
54
+ end
55
+
56
+ # experimental
57
+ def add_line(type, *args)
58
+ args = args.flat_map { |arg| [:string, arg] }
59
+ LibHTS.sam_hdr_add_line(@sam_hdr, type, *args, :pointer, FFI::Pointer::NULL)
60
+ end
61
+
62
+ # experimental
63
+ def find_line(type, key, value)
64
+ ks = LibHTS::KString.new
65
+ r = LibHTS.sam_hdr_find_line_id(@sam_hdr, type, key, value, ks)
66
+ r == 0 ? ks[:s] : nil
67
+ end
68
+
69
+ # experimental
70
+ def find_line_at(type, pos)
71
+ ks = LibHTS::KString.new
72
+ r = LibHTS.sam_hdr_find_line_pos(@sam_hdr, type, pos, ks)
73
+ r == 0 ? ks[:s] : nil
74
+ end
75
+
76
+ # experimental
77
+ def remove_line(type, key, value)
78
+ LibHTS.sam_hdr_remove_line_id(@sam_hdr, type, key, value)
79
+ end
80
+
81
+ # experimental
82
+ def remove_line_at(type, pos)
83
+ LibHTS.sam_hdr_remove_line_pos(@sam_hdr, type, pos)
84
+ end
85
+
35
86
  def to_s
36
87
  LibHTS.sam_hdr_str(@sam_hdr)
37
88
  end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bam < Hts
5
+ class HeaderRecord
6
+ def initialize
7
+ @bam_hrec
8
+ end
9
+ end
10
+ end
11
+ end
@@ -17,6 +17,7 @@ module HTS
17
17
  @header = header
18
18
  end
19
19
 
20
+ # Return the FFI::Struct object.
20
21
  def struct
21
22
  @bam1
22
23
  end
@@ -25,17 +26,14 @@ module HTS
25
26
  @bam1.to_ptr
26
27
  end
27
28
 
28
- # returns the query name.
29
+ # Get the read name. (a.k.a QNAME)
30
+ # @return [String] query template name
29
31
  def qname
30
32
  LibHTS.bam_get_qname(@bam1).read_string
31
33
  end
32
34
 
33
- # Set (query) name.
34
- # def qname=(name)
35
- # raise 'Not Implemented'
36
- # end
37
-
38
- # returns the tid of the record or -1 if not mapped.
35
+ # Get the chromosome ID of the alignment. -1 if not mapped.
36
+ # @return [Integer] chromosome ID
39
37
  def tid
40
38
  @bam1[:core][:tid]
41
39
  end
@@ -44,7 +42,8 @@ module HTS
44
42
  @bam1[:core][:tid] = tid
45
43
  end
46
44
 
47
- # returns the tid of the mate or -1 if not mapped.
45
+ # Get the chromosome ID of the mate. -1 if not mapped.
46
+ # @return [Integer] chromosome ID
48
47
  def mtid
49
48
  @bam1[:core][:mtid]
50
49
  end
@@ -53,7 +52,8 @@ module HTS
53
52
  @bam1[:core][:mtid] = mtid
54
53
  end
55
54
 
56
- # returns 0-based start position.
55
+ # Get the 0-based leftmost coordinate of the alignment.
56
+ # @return [Integer] 0-based leftmost coordinate
57
57
  def pos
58
58
  @bam1[:core][:pos]
59
59
  end
@@ -62,7 +62,8 @@ module HTS
62
62
  @bam1[:core][:pos] = pos
63
63
  end
64
64
 
65
- # returns 0-based mate position
65
+ # Get the 0-based leftmost coordinate of the mate.
66
+ # @return [Integer] 0-based leftmost coordinate
66
67
  def mate_pos
67
68
  @bam1[:core][:mpos]
68
69
  end
@@ -74,6 +75,8 @@ module HTS
74
75
  alias mpos mate_pos
75
76
  alias mpos= mate_pos=
76
77
 
78
+ # Get the bin calculated by bam_reg2bin().
79
+ # @return [Integer] bin
77
80
  def bin
78
81
  @bam1[:core][:bin]
79
82
  end
@@ -82,12 +85,15 @@ module HTS
82
85
  @bam1[:core][:bin] = bin
83
86
  end
84
87
 
85
- # returns end position of the read.
88
+ # Get the rightmost base position of the alignment on the reference genome.
89
+ # @return [Integer] 0-based rightmost coordinate
86
90
  def endpos
87
91
  LibHTS.bam_endpos @bam1
88
92
  end
89
93
 
90
- # returns the chromosome or '' if not mapped.
94
+ # Get the reference sequence name of the alignment. (a.k.a RNAME)
95
+ # '' if not mapped.
96
+ # @return [String] reference sequence name
91
97
  def chrom
92
98
  return "" if tid == -1
93
99
 
@@ -96,7 +102,9 @@ module HTS
96
102
 
97
103
  alias contig chrom
98
104
 
99
- # returns the chromosome of the mate or '' if not mapped.
105
+ # Get the reference sequence name of the mate.
106
+ # '' if not mapped.
107
+ # @return [String] reference sequence name
100
108
  def mate_chrom
101
109
  return "" if mtid == -1
102
110
 
@@ -105,12 +113,20 @@ module HTS
105
113
 
106
114
  alias mate_contig mate_chrom
107
115
 
108
- # Get strand information.
116
+ # Get whether the query is on the reverse strand.
117
+ # @return [String] strand "+" or "-"
109
118
  def strand
110
119
  LibHTS.bam_is_rev(@bam1) ? "-" : "+"
111
120
  end
112
121
 
113
- # insert size
122
+ # Get whether the query's mate is on the reverse strand.
123
+ # @return [String] strand "+" or "-"
124
+ def mate_strand
125
+ LibHTS.bam_is_mrev(@bam1) ? "-" : "+"
126
+ end
127
+
128
+ # Get the observed template length. (a.k.a TLEN)
129
+ # @return [Integer] isize
114
130
  def insert_size
115
131
  @bam1[:core][:isize]
116
132
  end
@@ -122,7 +138,8 @@ module HTS
122
138
  alias isize insert_size
123
139
  alias isize= insert_size=
124
140
 
125
- # mapping quality
141
+ # Get the mapping quality of the alignment. (a.k.a MAPQ)
142
+ # @return [Integer] mapping quality
126
143
  def mapq
127
144
  @bam1[:core][:qual]
128
145
  end
@@ -131,18 +148,24 @@ module HTS
131
148
  @bam1[:core][:qual] = mapq
132
149
  end
133
150
 
134
- # returns a `Cigar` object.
151
+ # Get the Bam::Cigar object.
152
+ # @return [Bam::Cigar] cigar
135
153
  def cigar
136
- Cigar.new(LibHTS.bam_get_cigar(@bam1), @bam1[:core][:n_cigar])
154
+ Cigar.new(self)
137
155
  end
138
156
 
157
+ # Calculate query length from CIGAR.
158
+ # @return [Integer] query length
139
159
  def qlen
160
+ # cigar.qlen will be slower because it converts to a Ruby array.
140
161
  LibHTS.bam_cigar2qlen(
141
162
  @bam1[:core][:n_cigar],
142
163
  LibHTS.bam_get_cigar(@bam1)
143
164
  )
144
165
  end
145
166
 
167
+ # Calculate reference length from CIGAR.
168
+ # @return [Integer] reference length
146
169
  def rlen
147
170
  LibHTS.bam_cigar2rlen(
148
171
  @bam1[:core][:n_cigar],
@@ -150,7 +173,8 @@ module HTS
150
173
  )
151
174
  end
152
175
 
153
- # return the read sequence
176
+ # Get the sequence. (a.k.a SEQ)
177
+ # @return [String] sequence
154
178
  def seq
155
179
  r = LibHTS.bam_get_seq(@bam1)
156
180
  seq = String.new
@@ -161,11 +185,15 @@ module HTS
161
185
  end
162
186
  alias sequence seq
163
187
 
188
+ # Get the length of the query sequence.
189
+ # @return [Integer] query length
164
190
  def len
165
191
  @bam1[:core][:l_qseq]
166
192
  end
167
193
 
168
- # return only the base of the requested index "i" of the query sequence.
194
+ # Get the base of the requested index "i" of the query sequence.
195
+ # @param [Integer] i index
196
+ # @return [String] base
169
197
  def base(n)
170
198
  n += @bam1[:core][:l_qseq] if n < 0
171
199
  return "." if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base(-1000)
@@ -174,13 +202,23 @@ module HTS
174
202
  SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
175
203
  end
176
204
 
177
- # return the base qualities
205
+ # Get the base qualities.
206
+ # @return [Array] base qualities
178
207
  def qual
179
208
  q_ptr = LibHTS.bam_get_qual(@bam1)
180
209
  q_ptr.read_array_of_uint8(@bam1[:core][:l_qseq])
181
210
  end
182
211
 
183
- # return only the base quality of the requested index "i" of the query sequence.
212
+ # Get the base qualities as a string. (a.k.a QUAL)
213
+ # ASCII of base quality + 33.
214
+ # @return [String] base qualities
215
+ def qual_string
216
+ qual.map { |q| (q + 33).chr }.join
217
+ end
218
+
219
+ # Get the base quality of the requested index "i" of the query sequence.
220
+ # @param [Integer] i index
221
+ # @return [Integer] base quality
184
222
  def base_qual(n)
185
223
  n += @bam1[:core][:l_qseq] if n < 0
186
224
  return 0 if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base_qual(-1000)
@@ -189,7 +227,8 @@ module HTS
189
227
  q_ptr.get_uint8(n)
190
228
  end
191
229
 
192
- # returns a `Flag` object.
230
+ # Get Bam::Flag object of the alignment.
231
+ # @return [Bam::Flag] flag
193
232
  def flag
194
233
  Flag.new(@bam1[:core][:flag])
195
234
  end
@@ -205,7 +244,9 @@ module HTS
205
244
  end
206
245
  end
207
246
 
208
- # retruns the auxillary fields.
247
+ # Get the auxiliary data.
248
+ # @param [String] key tag name
249
+ # @return [String] value
209
250
  def aux(key = nil)
210
251
  aux = Aux.new(self)
211
252
  if key
@@ -230,6 +271,7 @@ module HTS
230
271
  end
231
272
  end
232
273
 
274
+ # @return [String] a string representation of the alignment.
233
275
  def to_s
234
276
  kstr = LibHTS::KString.new
235
277
  raise "Failed to format bam record" if LibHTS.sam_format1(@header.struct, @bam1, kstr) == -1
data/lib/hts/bam.rb CHANGED
@@ -57,10 +57,9 @@ module HTS
57
57
  build_index(index) if build_index
58
58
  @idx = load_index(index)
59
59
  @start_position = tell
60
- super # do nothing
61
60
  end
62
61
 
63
- def build_index(index_name = nil, min_shift: 0)
62
+ def build_index(index_name = nil, min_shift: 0, threads: 2)
64
63
  check_closed
65
64
 
66
65
  if index_name
@@ -68,10 +67,15 @@ module HTS
68
67
  else
69
68
  warn "Create index for #{@file_name}"
70
69
  end
71
- r = LibHTS.sam_index_build3(@file_name, index_name, min_shift, @nthreads)
72
- raise "Failed to build index for #{@file_name}" if r < 0
73
-
74
- self
70
+ case LibHTS.sam_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
71
+ when 0 # successful
72
+ when -1 then raise "indexing failed"
73
+ when -2 then raise "opening #{@file_name} failed"
74
+ when -3 then raise "format not indexable"
75
+ when -4 then raise "failed to create and/or save the index"
76
+ else raise "unknown error"
77
+ end
78
+ self # for method chaining
75
79
  end
76
80
 
77
81
  def load_index(index_name = nil)
@@ -90,7 +94,6 @@ module HTS
90
94
  !@idx.null?
91
95
  end
92
96
 
93
- # Close the current file.
94
97
  def close
95
98
  LibHTS.hts_idx_destroy(@idx) if @idx&.null?
96
99
  @idx = nil
@@ -101,15 +104,23 @@ module HTS
101
104
  check_closed
102
105
 
103
106
  @header = header.dup
104
- LibHTS.hts_set_fai_filename(@hts_file, @file_name)
105
107
  LibHTS.sam_hdr_write(@hts_file, header)
106
108
  end
107
109
 
108
- def write(aln)
110
+ def header=(header)
111
+ write_header(header)
112
+ end
113
+
114
+ def write(record)
109
115
  check_closed
110
116
 
111
- aln_dup = aln.dup
112
- LibHTS.sam_write1(@hts_file, header, aln_dup) > 0 || raise
117
+ # record = record.dup
118
+ r = LibHTS.sam_write1(@hts_file, header, record)
119
+ raise "Failed to write record" if r < 0
120
+ end
121
+
122
+ def <<(aln)
123
+ write(aln)
113
124
  end
114
125
 
115
126
  def each(copy: false, &block)
@@ -120,29 +131,6 @@ module HTS
120
131
  end
121
132
  end
122
133
 
123
- private def each_record_copy
124
- check_closed
125
- return to_enum(__method__) unless block_given?
126
-
127
- while LibHTS.sam_read1(@hts_file, header, bam1 = LibHTS.bam_init1) != -1
128
- record = Record.new(bam1, header)
129
- yield record
130
- end
131
- self
132
- end
133
-
134
- private def each_record_reuse
135
- check_closed
136
- # Each does not always start at the beginning of the file.
137
- # This is the common behavior of IO objects in Ruby.
138
- return to_enum(__method__) unless block_given?
139
-
140
- bam1 = LibHTS.bam_init1
141
- record = Record.new(bam1, header)
142
- yield record while LibHTS.sam_read1(@hts_file, header, bam1) != -1
143
- self
144
- end
145
-
146
134
  def query(region, copy: false, &block)
147
135
  if copy
148
136
  query_copy(region, &block)
@@ -151,45 +139,6 @@ module HTS
151
139
  end
152
140
  end
153
141
 
154
- private def query_copy(region)
155
- check_closed
156
- raise "Index file is required to call the query method." unless index_loaded?
157
- return to_enum(__method__, region) unless block_given?
158
-
159
- qiter = LibHTS.sam_itr_querys(@idx, header, region)
160
-
161
- begin
162
- loop do
163
- bam1 = LibHTS.bam_init1
164
- slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)
165
- break if slen == -1
166
- raise if slen < -1
167
-
168
- yield Record.new(bam1, header)
169
- end
170
- ensure
171
- LibHTS.hts_itr_destroy(qiter)
172
- end
173
- self
174
- end
175
-
176
- private def query_reuse(region)
177
- check_closed
178
- raise "Index file is required to call the query method." unless index_loaded?
179
- return to_enum(__method__, region) unless block_given?
180
-
181
- qiter = LibHTS.sam_itr_querys(@idx, header, region)
182
-
183
- bam1 = LibHTS.bam_init1
184
- record = Record.new(bam1, header)
185
- begin
186
- yield record while LibHTS.sam_itr_next(@hts_file, qiter, bam1) > 0
187
- ensure
188
- LibHTS.hts_itr_destroy(qiter)
189
- end
190
- self
191
- end
192
-
193
142
  # @!macro [attach] define_getter
194
143
  # @method $1
195
144
  # Get $1 array
@@ -247,5 +196,71 @@ module HTS
247
196
 
248
197
  self
249
198
  end
199
+
200
+ private
201
+
202
+ def query_reuse(region)
203
+ check_closed
204
+ raise "Index file is required to call the query method." unless index_loaded?
205
+ return to_enum(__method__, region) unless block_given?
206
+
207
+ qiter = LibHTS.sam_itr_querys(@idx, header, region)
208
+ raise "Failed to query region: #{region}" if qiter.null?
209
+
210
+ bam1 = LibHTS.bam_init1
211
+ record = Record.new(bam1, header)
212
+ begin
213
+ yield record while LibHTS.sam_itr_next(@hts_file, qiter, bam1) > 0
214
+ ensure
215
+ LibHTS.hts_itr_destroy(qiter)
216
+ end
217
+ self
218
+ end
219
+
220
+ def query_copy(region)
221
+ check_closed
222
+ raise "Index file is required to call the query method." unless index_loaded?
223
+ return to_enum(__method__, region) unless block_given?
224
+
225
+ qiter = LibHTS.sam_itr_querys(@idx, header, region)
226
+ raise "Failed to query region: #{region}" if qiter.null?
227
+
228
+ begin
229
+ loop do
230
+ bam1 = LibHTS.bam_init1
231
+ slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)
232
+ break if slen == -1
233
+ raise if slen < -1
234
+
235
+ yield Record.new(bam1, header)
236
+ end
237
+ ensure
238
+ LibHTS.hts_itr_destroy(qiter)
239
+ end
240
+ self
241
+ end
242
+
243
+ def each_record_reuse
244
+ check_closed
245
+ # Each does not always start at the beginning of the file.
246
+ # This is the common behavior of IO objects in Ruby.
247
+ return to_enum(__method__) unless block_given?
248
+
249
+ bam1 = LibHTS.bam_init1
250
+ record = Record.new(bam1, header)
251
+ yield record while LibHTS.sam_read1(@hts_file, header, bam1) != -1
252
+ self
253
+ end
254
+
255
+ def each_record_copy
256
+ check_closed
257
+ return to_enum(__method__) unless block_given?
258
+
259
+ while LibHTS.sam_read1(@hts_file, header, bam1 = LibHTS.bam_init1) != -1
260
+ record = Record.new(bam1, header)
261
+ yield record
262
+ end
263
+ self
264
+ end
250
265
  end
251
266
  end
@@ -8,30 +8,10 @@ module HTS
8
8
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
9
9
  end
10
10
 
11
- # For compatibility with htslib.cr.
12
- def get_int(key)
13
- get(key, :int)
14
- end
15
-
16
- # For compatibility with htslib.cr.
17
- def get_float(key)
18
- get(key, :float)
19
- end
20
-
21
- # For compatibility with htslib.cr.
22
- def get_flag(key)
23
- get(key, :flag)
24
- end
25
-
26
- # For compatibility with htslib.cr.
27
- def get_string(key)
28
- get(key, :string)
29
- end
30
-
31
- def [](key)
32
- get(key)
33
- end
34
-
11
+ # @note: Why is this method named "get" instead of "fetch"?
12
+ # This is for compatibility with the Crystal language
13
+ # which provides methods like `get_int`, `get_float`, etc.
14
+ # I think they are better than `fetch_int`` and `fetch_float`.
35
15
  def get(key, type = nil)
36
16
  n = FFI::MemoryPointer.new(:int)
37
17
  p1 = @p1
@@ -73,6 +53,30 @@ module HTS
73
53
  end
74
54
  end
75
55
 
56
+ # For compatibility with HTS.cr.
57
+ def get_int(key)
58
+ get(key, :int)
59
+ end
60
+
61
+ # For compatibility with HTS.cr.
62
+ def get_float(key)
63
+ get(key, :float)
64
+ end
65
+
66
+ # For compatibility with HTS.cr.
67
+ def get_flag(key)
68
+ get(key, :flag)
69
+ end
70
+
71
+ # For compatibility with HTS.cr.
72
+ def get_string(key)
73
+ get(key, :string)
74
+ end
75
+
76
+ def [](key)
77
+ get(key)
78
+ end
79
+
76
80
  def fields
77
81
  ids.map do |id|
78
82
  name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, id)