htslib 0.2.3 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,26 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "header_record"
4
+
3
5
  module HTS
4
6
  class Bam < Hts
5
7
  # A class for working with alignment header.
6
8
  class Header
7
- def initialize(hts_file)
8
- @sam_hdr = LibHTS.sam_hdr_read(hts_file)
9
+ def self.parse(str)
10
+ new(LibHTS.sam_hdr_parse(str.size, str))
11
+ end
12
+
13
+ def initialize(arg = nil)
14
+ case arg
15
+ when LibHTS::HtsFile
16
+ @sam_hdr = LibHTS.sam_hdr_read(arg)
17
+ when LibHTS::SamHdr
18
+ @sam_hdr = arg
19
+ when nil
20
+ @sam_hdr = LibHTS.sam_hdr_init
21
+ else
22
+ raise TypeError, "Invalid argument"
23
+ end
9
24
  end
10
25
 
11
26
  def struct
@@ -17,6 +32,7 @@ module HTS
17
32
  end
18
33
 
19
34
  def target_count
35
+ # FIXME: sam_hdr_nref
20
36
  @sam_hdr[:n_targets]
21
37
  end
22
38
 
@@ -32,6 +48,41 @@ module HTS
32
48
  end
33
49
  end
34
50
 
51
+ # experimental
52
+ def add_lines(str)
53
+ LibHTS.sam_hdr_add_lines(@sam_hdr, str, 0)
54
+ end
55
+
56
+ # experimental
57
+ def add_line(type, *args)
58
+ args = args.flat_map { |arg| [:string, arg] }
59
+ LibHTS.sam_hdr_add_line(@sam_hdr, type, *args, :pointer, FFI::Pointer::NULL)
60
+ end
61
+
62
+ # experimental
63
+ def find_line(type, key, value)
64
+ ks = LibHTS::KString.new
65
+ r = LibHTS.sam_hdr_find_line_id(@sam_hdr, type, key, value, ks)
66
+ r == 0 ? ks[:s] : nil
67
+ end
68
+
69
+ # experimental
70
+ def find_line_at(type, pos)
71
+ ks = LibHTS::KString.new
72
+ r = LibHTS.sam_hdr_find_line_pos(@sam_hdr, type, pos, ks)
73
+ r == 0 ? ks[:s] : nil
74
+ end
75
+
76
+ # experimental
77
+ def remove_line(type, key, value)
78
+ LibHTS.sam_hdr_remove_line_id(@sam_hdr, type, key, value)
79
+ end
80
+
81
+ # experimental
82
+ def remove_line_at(type, pos)
83
+ LibHTS.sam_hdr_remove_line_pos(@sam_hdr, type, pos)
84
+ end
85
+
35
86
  def to_s
36
87
  LibHTS.sam_hdr_str(@sam_hdr)
37
88
  end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ class Bam < Hts
5
+ class HeaderRecord
6
+ def initialize
7
+ @bam_hrec
8
+ end
9
+ end
10
+ end
11
+ end
@@ -17,6 +17,7 @@ module HTS
17
17
  @header = header
18
18
  end
19
19
 
20
+ # Return the FFI::Struct object.
20
21
  def struct
21
22
  @bam1
22
23
  end
@@ -25,17 +26,14 @@ module HTS
25
26
  @bam1.to_ptr
26
27
  end
27
28
 
28
- # returns the query name.
29
+ # Get the read name. (a.k.a QNAME)
30
+ # @return [String] query template name
29
31
  def qname
30
32
  LibHTS.bam_get_qname(@bam1).read_string
31
33
  end
32
34
 
33
- # Set (query) name.
34
- # def qname=(name)
35
- # raise 'Not Implemented'
36
- # end
37
-
38
- # returns the tid of the record or -1 if not mapped.
35
+ # Get the chromosome ID of the alignment. -1 if not mapped.
36
+ # @return [Integer] chromosome ID
39
37
  def tid
40
38
  @bam1[:core][:tid]
41
39
  end
@@ -44,7 +42,8 @@ module HTS
44
42
  @bam1[:core][:tid] = tid
45
43
  end
46
44
 
47
- # returns the tid of the mate or -1 if not mapped.
45
+ # Get the chromosome ID of the mate. -1 if not mapped.
46
+ # @return [Integer] chromosome ID
48
47
  def mtid
49
48
  @bam1[:core][:mtid]
50
49
  end
@@ -53,7 +52,8 @@ module HTS
53
52
  @bam1[:core][:mtid] = mtid
54
53
  end
55
54
 
56
- # returns 0-based start position.
55
+ # Get the 0-based leftmost coordinate of the alignment.
56
+ # @return [Integer] 0-based leftmost coordinate
57
57
  def pos
58
58
  @bam1[:core][:pos]
59
59
  end
@@ -62,7 +62,8 @@ module HTS
62
62
  @bam1[:core][:pos] = pos
63
63
  end
64
64
 
65
- # returns 0-based mate position
65
+ # Get the 0-based leftmost coordinate of the mate.
66
+ # @return [Integer] 0-based leftmost coordinate
66
67
  def mate_pos
67
68
  @bam1[:core][:mpos]
68
69
  end
@@ -74,6 +75,8 @@ module HTS
74
75
  alias mpos mate_pos
75
76
  alias mpos= mate_pos=
76
77
 
78
+ # Get the bin calculated by bam_reg2bin().
79
+ # @return [Integer] bin
77
80
  def bin
78
81
  @bam1[:core][:bin]
79
82
  end
@@ -82,12 +85,15 @@ module HTS
82
85
  @bam1[:core][:bin] = bin
83
86
  end
84
87
 
85
- # returns end position of the read.
88
+ # Get the rightmost base position of the alignment on the reference genome.
89
+ # @return [Integer] 0-based rightmost coordinate
86
90
  def endpos
87
91
  LibHTS.bam_endpos @bam1
88
92
  end
89
93
 
90
- # returns the chromosome or '' if not mapped.
94
+ # Get the reference sequence name of the alignment. (a.k.a RNAME)
95
+ # '' if not mapped.
96
+ # @return [String] reference sequence name
91
97
  def chrom
92
98
  return "" if tid == -1
93
99
 
@@ -96,7 +102,9 @@ module HTS
96
102
 
97
103
  alias contig chrom
98
104
 
99
- # returns the chromosome of the mate or '' if not mapped.
105
+ # Get the reference sequence name of the mate.
106
+ # '' if not mapped.
107
+ # @return [String] reference sequence name
100
108
  def mate_chrom
101
109
  return "" if mtid == -1
102
110
 
@@ -105,12 +113,20 @@ module HTS
105
113
 
106
114
  alias mate_contig mate_chrom
107
115
 
108
- # Get strand information.
116
+ # Get whether the query is on the reverse strand.
117
+ # @return [String] strand "+" or "-"
109
118
  def strand
110
119
  LibHTS.bam_is_rev(@bam1) ? "-" : "+"
111
120
  end
112
121
 
113
- # insert size
122
+ # Get whether the query's mate is on the reverse strand.
123
+ # @return [String] strand "+" or "-"
124
+ def mate_strand
125
+ LibHTS.bam_is_mrev(@bam1) ? "-" : "+"
126
+ end
127
+
128
+ # Get the observed template length. (a.k.a TLEN)
129
+ # @return [Integer] isize
114
130
  def insert_size
115
131
  @bam1[:core][:isize]
116
132
  end
@@ -122,7 +138,8 @@ module HTS
122
138
  alias isize insert_size
123
139
  alias isize= insert_size=
124
140
 
125
- # mapping quality
141
+ # Get the mapping quality of the alignment. (a.k.a MAPQ)
142
+ # @return [Integer] mapping quality
126
143
  def mapq
127
144
  @bam1[:core][:qual]
128
145
  end
@@ -131,18 +148,24 @@ module HTS
131
148
  @bam1[:core][:qual] = mapq
132
149
  end
133
150
 
134
- # returns a `Cigar` object.
151
+ # Get the Bam::Cigar object.
152
+ # @return [Bam::Cigar] cigar
135
153
  def cigar
136
- Cigar.new(LibHTS.bam_get_cigar(@bam1), @bam1[:core][:n_cigar])
154
+ Cigar.new(self)
137
155
  end
138
156
 
157
+ # Calculate query length from CIGAR.
158
+ # @return [Integer] query length
139
159
  def qlen
160
+ # cigar.qlen will be slower because it converts to a Ruby array.
140
161
  LibHTS.bam_cigar2qlen(
141
162
  @bam1[:core][:n_cigar],
142
163
  LibHTS.bam_get_cigar(@bam1)
143
164
  )
144
165
  end
145
166
 
167
+ # Calculate reference length from CIGAR.
168
+ # @return [Integer] reference length
146
169
  def rlen
147
170
  LibHTS.bam_cigar2rlen(
148
171
  @bam1[:core][:n_cigar],
@@ -150,7 +173,8 @@ module HTS
150
173
  )
151
174
  end
152
175
 
153
- # return the read sequence
176
+ # Get the sequence. (a.k.a SEQ)
177
+ # @return [String] sequence
154
178
  def seq
155
179
  r = LibHTS.bam_get_seq(@bam1)
156
180
  seq = String.new
@@ -161,11 +185,15 @@ module HTS
161
185
  end
162
186
  alias sequence seq
163
187
 
188
+ # Get the length of the query sequence.
189
+ # @return [Integer] query length
164
190
  def len
165
191
  @bam1[:core][:l_qseq]
166
192
  end
167
193
 
168
- # return only the base of the requested index "i" of the query sequence.
194
+ # Get the base of the requested index "i" of the query sequence.
195
+ # @param [Integer] i index
196
+ # @return [String] base
169
197
  def base(n)
170
198
  n += @bam1[:core][:l_qseq] if n < 0
171
199
  return "." if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base(-1000)
@@ -174,13 +202,23 @@ module HTS
174
202
  SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
175
203
  end
176
204
 
177
- # return the base qualities
205
+ # Get the base qualities.
206
+ # @return [Array] base qualities
178
207
  def qual
179
208
  q_ptr = LibHTS.bam_get_qual(@bam1)
180
209
  q_ptr.read_array_of_uint8(@bam1[:core][:l_qseq])
181
210
  end
182
211
 
183
- # return only the base quality of the requested index "i" of the query sequence.
212
+ # Get the base qualities as a string. (a.k.a QUAL)
213
+ # ASCII of base quality + 33.
214
+ # @return [String] base qualities
215
+ def qual_string
216
+ qual.map { |q| (q + 33).chr }.join
217
+ end
218
+
219
+ # Get the base quality of the requested index "i" of the query sequence.
220
+ # @param [Integer] i index
221
+ # @return [Integer] base quality
184
222
  def base_qual(n)
185
223
  n += @bam1[:core][:l_qseq] if n < 0
186
224
  return 0 if (n >= @bam1[:core][:l_qseq]) || (n < 0) # eg. base_qual(-1000)
@@ -189,7 +227,8 @@ module HTS
189
227
  q_ptr.get_uint8(n)
190
228
  end
191
229
 
192
- # returns a `Flag` object.
230
+ # Get Bam::Flag object of the alignment.
231
+ # @return [Bam::Flag] flag
193
232
  def flag
194
233
  Flag.new(@bam1[:core][:flag])
195
234
  end
@@ -205,7 +244,9 @@ module HTS
205
244
  end
206
245
  end
207
246
 
208
- # retruns the auxillary fields.
247
+ # Get the auxiliary data.
248
+ # @param [String] key tag name
249
+ # @return [String] value
209
250
  def aux(key = nil)
210
251
  aux = Aux.new(self)
211
252
  if key
@@ -230,6 +271,7 @@ module HTS
230
271
  end
231
272
  end
232
273
 
274
+ # @return [String] a string representation of the alignment.
233
275
  def to_s
234
276
  kstr = LibHTS::KString.new
235
277
  raise "Failed to format bam record" if LibHTS.sam_format1(@header.struct, @bam1, kstr) == -1
data/lib/hts/bam.rb CHANGED
@@ -57,10 +57,9 @@ module HTS
57
57
  build_index(index) if build_index
58
58
  @idx = load_index(index)
59
59
  @start_position = tell
60
- super # do nothing
61
60
  end
62
61
 
63
- def build_index(index_name = nil, min_shift: 0)
62
+ def build_index(index_name = nil, min_shift: 0, threads: 2)
64
63
  check_closed
65
64
 
66
65
  if index_name
@@ -68,10 +67,15 @@ module HTS
68
67
  else
69
68
  warn "Create index for #{@file_name}"
70
69
  end
71
- r = LibHTS.sam_index_build3(@file_name, index_name, min_shift, @nthreads)
72
- raise "Failed to build index for #{@file_name}" if r < 0
73
-
74
- self
70
+ case LibHTS.sam_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
71
+ when 0 # successful
72
+ when -1 then raise "indexing failed"
73
+ when -2 then raise "opening #{@file_name} failed"
74
+ when -3 then raise "format not indexable"
75
+ when -4 then raise "failed to create and/or save the index"
76
+ else raise "unknown error"
77
+ end
78
+ self # for method chaining
75
79
  end
76
80
 
77
81
  def load_index(index_name = nil)
@@ -90,7 +94,6 @@ module HTS
90
94
  !@idx.null?
91
95
  end
92
96
 
93
- # Close the current file.
94
97
  def close
95
98
  LibHTS.hts_idx_destroy(@idx) if @idx&.null?
96
99
  @idx = nil
@@ -101,15 +104,23 @@ module HTS
101
104
  check_closed
102
105
 
103
106
  @header = header.dup
104
- LibHTS.hts_set_fai_filename(@hts_file, @file_name)
105
107
  LibHTS.sam_hdr_write(@hts_file, header)
106
108
  end
107
109
 
108
- def write(aln)
110
+ def header=(header)
111
+ write_header(header)
112
+ end
113
+
114
+ def write(record)
109
115
  check_closed
110
116
 
111
- aln_dup = aln.dup
112
- LibHTS.sam_write1(@hts_file, header, aln_dup) > 0 || raise
117
+ # record = record.dup
118
+ r = LibHTS.sam_write1(@hts_file, header, record)
119
+ raise "Failed to write record" if r < 0
120
+ end
121
+
122
+ def <<(aln)
123
+ write(aln)
113
124
  end
114
125
 
115
126
  def each(copy: false, &block)
@@ -120,29 +131,6 @@ module HTS
120
131
  end
121
132
  end
122
133
 
123
- private def each_record_copy
124
- check_closed
125
- return to_enum(__method__) unless block_given?
126
-
127
- while LibHTS.sam_read1(@hts_file, header, bam1 = LibHTS.bam_init1) != -1
128
- record = Record.new(bam1, header)
129
- yield record
130
- end
131
- self
132
- end
133
-
134
- private def each_record_reuse
135
- check_closed
136
- # Each does not always start at the beginning of the file.
137
- # This is the common behavior of IO objects in Ruby.
138
- return to_enum(__method__) unless block_given?
139
-
140
- bam1 = LibHTS.bam_init1
141
- record = Record.new(bam1, header)
142
- yield record while LibHTS.sam_read1(@hts_file, header, bam1) != -1
143
- self
144
- end
145
-
146
134
  def query(region, copy: false, &block)
147
135
  if copy
148
136
  query_copy(region, &block)
@@ -151,45 +139,6 @@ module HTS
151
139
  end
152
140
  end
153
141
 
154
- private def query_copy(region)
155
- check_closed
156
- raise "Index file is required to call the query method." unless index_loaded?
157
- return to_enum(__method__, region) unless block_given?
158
-
159
- qiter = LibHTS.sam_itr_querys(@idx, header, region)
160
-
161
- begin
162
- loop do
163
- bam1 = LibHTS.bam_init1
164
- slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)
165
- break if slen == -1
166
- raise if slen < -1
167
-
168
- yield Record.new(bam1, header)
169
- end
170
- ensure
171
- LibHTS.hts_itr_destroy(qiter)
172
- end
173
- self
174
- end
175
-
176
- private def query_reuse(region)
177
- check_closed
178
- raise "Index file is required to call the query method." unless index_loaded?
179
- return to_enum(__method__, region) unless block_given?
180
-
181
- qiter = LibHTS.sam_itr_querys(@idx, header, region)
182
-
183
- bam1 = LibHTS.bam_init1
184
- record = Record.new(bam1, header)
185
- begin
186
- yield record while LibHTS.sam_itr_next(@hts_file, qiter, bam1) > 0
187
- ensure
188
- LibHTS.hts_itr_destroy(qiter)
189
- end
190
- self
191
- end
192
-
193
142
  # @!macro [attach] define_getter
194
143
  # @method $1
195
144
  # Get $1 array
@@ -247,5 +196,71 @@ module HTS
247
196
 
248
197
  self
249
198
  end
199
+
200
+ private
201
+
202
+ def query_reuse(region)
203
+ check_closed
204
+ raise "Index file is required to call the query method." unless index_loaded?
205
+ return to_enum(__method__, region) unless block_given?
206
+
207
+ qiter = LibHTS.sam_itr_querys(@idx, header, region)
208
+ raise "Failed to query region: #{region}" if qiter.null?
209
+
210
+ bam1 = LibHTS.bam_init1
211
+ record = Record.new(bam1, header)
212
+ begin
213
+ yield record while LibHTS.sam_itr_next(@hts_file, qiter, bam1) > 0
214
+ ensure
215
+ LibHTS.hts_itr_destroy(qiter)
216
+ end
217
+ self
218
+ end
219
+
220
+ def query_copy(region)
221
+ check_closed
222
+ raise "Index file is required to call the query method." unless index_loaded?
223
+ return to_enum(__method__, region) unless block_given?
224
+
225
+ qiter = LibHTS.sam_itr_querys(@idx, header, region)
226
+ raise "Failed to query region: #{region}" if qiter.null?
227
+
228
+ begin
229
+ loop do
230
+ bam1 = LibHTS.bam_init1
231
+ slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)
232
+ break if slen == -1
233
+ raise if slen < -1
234
+
235
+ yield Record.new(bam1, header)
236
+ end
237
+ ensure
238
+ LibHTS.hts_itr_destroy(qiter)
239
+ end
240
+ self
241
+ end
242
+
243
+ def each_record_reuse
244
+ check_closed
245
+ # Each does not always start at the beginning of the file.
246
+ # This is the common behavior of IO objects in Ruby.
247
+ return to_enum(__method__) unless block_given?
248
+
249
+ bam1 = LibHTS.bam_init1
250
+ record = Record.new(bam1, header)
251
+ yield record while LibHTS.sam_read1(@hts_file, header, bam1) != -1
252
+ self
253
+ end
254
+
255
+ def each_record_copy
256
+ check_closed
257
+ return to_enum(__method__) unless block_given?
258
+
259
+ while LibHTS.sam_read1(@hts_file, header, bam1 = LibHTS.bam_init1) != -1
260
+ record = Record.new(bam1, header)
261
+ yield record
262
+ end
263
+ self
264
+ end
250
265
  end
251
266
  end
@@ -8,30 +8,10 @@ module HTS
8
8
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
9
9
  end
10
10
 
11
- # For compatibility with htslib.cr.
12
- def get_int(key)
13
- get(key, :int)
14
- end
15
-
16
- # For compatibility with htslib.cr.
17
- def get_float(key)
18
- get(key, :float)
19
- end
20
-
21
- # For compatibility with htslib.cr.
22
- def get_flag(key)
23
- get(key, :flag)
24
- end
25
-
26
- # For compatibility with htslib.cr.
27
- def get_string(key)
28
- get(key, :string)
29
- end
30
-
31
- def [](key)
32
- get(key)
33
- end
34
-
11
+ # @note: Why is this method named "get" instead of "fetch"?
12
+ # This is for compatibility with the Crystal language
13
+ # which provides methods like `get_int`, `get_float`, etc.
14
+ # I think they are better than `fetch_int`` and `fetch_float`.
35
15
  def get(key, type = nil)
36
16
  n = FFI::MemoryPointer.new(:int)
37
17
  p1 = @p1
@@ -73,6 +53,30 @@ module HTS
73
53
  end
74
54
  end
75
55
 
56
+ # For compatibility with HTS.cr.
57
+ def get_int(key)
58
+ get(key, :int)
59
+ end
60
+
61
+ # For compatibility with HTS.cr.
62
+ def get_float(key)
63
+ get(key, :float)
64
+ end
65
+
66
+ # For compatibility with HTS.cr.
67
+ def get_flag(key)
68
+ get(key, :flag)
69
+ end
70
+
71
+ # For compatibility with HTS.cr.
72
+ def get_string(key)
73
+ get(key, :string)
74
+ end
75
+
76
+ def [](key)
77
+ get(key)
78
+ end
79
+
76
80
  def fields
77
81
  ids.map do |id|
78
82
  name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, id)