htslib 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
data/lib/hts/bam.rb CHANGED
@@ -59,7 +59,7 @@ module HTS
59
59
  @start_position = tell
60
60
  end
61
61
 
62
- def build_index(index_name = nil, min_shift: 0)
62
+ def build_index(index_name = nil, min_shift: 0, threads: 2)
63
63
  check_closed
64
64
 
65
65
  if index_name
@@ -67,10 +67,15 @@ module HTS
67
67
  else
68
68
  warn "Create index for #{@file_name}"
69
69
  end
70
- r = LibHTS.sam_index_build3(@file_name, index_name, min_shift, @nthreads)
71
- raise "Failed to build index for #{@file_name}" if r < 0
72
-
73
- self
70
+ case LibHTS.sam_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
71
+ when 0 # successful
72
+ when -1 then raise "indexing failed"
73
+ when -2 then raise "opening #{@file_name} failed"
74
+ when -3 then raise "format not indexable"
75
+ when -4 then raise "failed to create and/or save the index"
76
+ else raise "unknown error"
77
+ end
78
+ self # for method chaining
74
79
  end
75
80
 
76
81
  def load_index(index_name = nil)
@@ -95,11 +100,6 @@ module HTS
95
100
  super
96
101
  end
97
102
 
98
- def fai=(fai)
99
- check_closed
100
- LibHTS.hts_set_fai_filename(@hts_file, fai) > 0 || raise
101
- end
102
-
103
103
  def write_header(header)
104
104
  check_closed
105
105
 
@@ -107,14 +107,22 @@ module HTS
107
107
  LibHTS.sam_hdr_write(@hts_file, header)
108
108
  end
109
109
 
110
- def write(aln)
110
+ def header=(header)
111
+ write_header(header)
112
+ end
113
+
114
+ def write(record)
111
115
  check_closed
112
116
 
113
- aln_dup = aln.dup
114
- r = LibHTS.sam_write1(@hts_file, header, aln_dup)
117
+ # record = record.dup
118
+ r = LibHTS.sam_write1(@hts_file, header, record)
115
119
  raise "Failed to write record" if r < 0
116
120
  end
117
121
 
122
+ def <<(aln)
123
+ write(aln)
124
+ end
125
+
118
126
  def each(copy: false, &block)
119
127
  if copy
120
128
  each_record_copy(&block)
@@ -8,30 +8,10 @@ module HTS
8
8
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
9
9
  end
10
10
 
11
- # For compatibility with HTS.cr.
12
- def get_int(key)
13
- get(key, :int)
14
- end
15
-
16
- # For compatibility with HTS.cr.
17
- def get_float(key)
18
- get(key, :float)
19
- end
20
-
21
- # For compatibility with HTS.cr.
22
- def get_flag(key)
23
- get(key, :flag)
24
- end
25
-
26
- # For compatibility with HTS.cr.
27
- def get_string(key)
28
- get(key, :string)
29
- end
30
-
31
- def [](key)
32
- get(key)
33
- end
34
-
11
+ # @note: Why is this method named "get" instead of "fetch"?
12
+ # This is for compatibility with the Crystal language
13
+ # which provides methods like `get_int`, `get_float`, etc.
14
+ # I think they are better than `fetch_int`` and `fetch_float`.
35
15
  def get(key, type = nil)
36
16
  n = FFI::MemoryPointer.new(:int)
37
17
  p1 = @p1
@@ -73,6 +53,30 @@ module HTS
73
53
  end
74
54
  end
75
55
 
56
+ # For compatibility with HTS.cr.
57
+ def get_int(key)
58
+ get(key, :int)
59
+ end
60
+
61
+ # For compatibility with HTS.cr.
62
+ def get_float(key)
63
+ get(key, :float)
64
+ end
65
+
66
+ # For compatibility with HTS.cr.
67
+ def get_flag(key)
68
+ get(key, :flag)
69
+ end
70
+
71
+ # For compatibility with HTS.cr.
72
+ def get_string(key)
73
+ get(key, :string)
74
+ end
75
+
76
+ def [](key)
77
+ get(key)
78
+ end
79
+
76
80
  def fields
77
81
  ids.map do |id|
78
82
  name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, id)
data/lib/hts/bcf/info.rb CHANGED
@@ -9,31 +9,11 @@ module HTS
9
9
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
10
  end
11
11
 
12
- # For compatibility with HTS.cr.
13
- def get_int(key)
14
- get(key, :int)
15
- end
16
-
17
- # For compatibility with HTS.cr.
18
- def get_float(key)
19
- get(key, :float)
20
- end
21
-
22
- # For compatibility with HTS.cr.
23
- def get_string(key)
24
- get(key, :string)
25
- end
26
-
27
- # For compatibility with HTS.cr.
28
- def get_flag(key)
29
- get(key, :flag)
30
- end
31
-
32
- def [](key)
33
- get(key)
34
- end
35
-
36
12
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
13
+ # @note: Why is this method named "get" instead of "fetch"?
14
+ # This is for compatibility with the Crystal language
15
+ # which provides methods like `get_int`, `get_float`, etc.
16
+ # I think they are better than `fetch_int`` and `fetch_float`.
37
17
  def get(key, type = nil)
38
18
  n = FFI::MemoryPointer.new(:int)
39
19
  p1 = @p1
@@ -70,6 +50,30 @@ module HTS
70
50
  end
71
51
  end
72
52
 
53
+ # For compatibility with HTS.cr.
54
+ def get_int(key)
55
+ get(key, :int)
56
+ end
57
+
58
+ # For compatibility with HTS.cr.
59
+ def get_float(key)
60
+ get(key, :float)
61
+ end
62
+
63
+ # For compatibility with HTS.cr.
64
+ def get_string(key)
65
+ get(key, :string)
66
+ end
67
+
68
+ # For compatibility with HTS.cr.
69
+ def get_flag(key)
70
+ get(key, :flag)
71
+ end
72
+
73
+ def [](key)
74
+ get(key)
75
+ end
76
+
73
77
  # FIXME: naming? room for improvement.
74
78
  def fields
75
79
  keys.map do |key|
data/lib/hts/bcf.rb CHANGED
@@ -54,7 +54,7 @@ module HTS
54
54
  @start_position = tell
55
55
  end
56
56
 
57
- def build_index(index_name = nil, min_shift: 14)
57
+ def build_index(index_name = nil, min_shift: 14, threads: 2)
58
58
  check_closed
59
59
 
60
60
  if index_name
@@ -62,10 +62,15 @@ module HTS
62
62
  else
63
63
  warn "Create index for #{@file_name}"
64
64
  end
65
- r = LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads)
66
- raise "Failed to build index for #{@file_name}" if r < 0
67
-
68
- self
65
+ case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
66
+ when 0 # sccessful
67
+ when -1 then raise "indexing failed"
68
+ when -2 then raise "opening #{@file_name} failed"
69
+ when -3 then raise "format not indexable"
70
+ when -4 then raise "failed to create and/or save the index"
71
+ else raise "unknown error"
72
+ end
73
+ self # for method chaining
69
74
  end
70
75
 
71
76
  def load_index(index_name = nil)
@@ -97,15 +102,21 @@ module HTS
97
102
  LibHTS.bcf_hdr_write(@hts_file, header)
98
103
  end
99
104
 
100
- def write(var)
105
+ def header=(header)
106
+ write_header(header)
107
+ end
108
+
109
+ def write(record)
101
110
  check_closed
102
111
 
103
- var_dup = var.dup
104
- r = LibHTS.bcf_write(@hts_file, header, var_dup)
112
+ # record = record.dup
113
+ r = LibHTS.bcf_write(@hts_file, header, record)
105
114
  raise "Failed to write record" if r < 0
106
115
  end
107
116
 
108
- # Close the current file.
117
+ def <<(var)
118
+ write(var)
119
+ end
109
120
 
110
121
  def nsamples
111
122
  check_closed
@@ -164,13 +175,13 @@ module HTS
164
175
  def info(key = nil)
165
176
  check_closed
166
177
  position = tell
167
- if key
168
- ary = map { |r| r.info(key) }
169
- else
170
- raise NotImplementedError
171
- # ary = each_copy.map { |r| r.info }
172
- # ary = map { |r| r.info.clone }
173
- end
178
+ raise NotImplementedError unless key
179
+
180
+ ary = map { |r| r.info(key) }
181
+
182
+ # ary = each_copy.map { |r| r.info }
183
+ # ary = map { |r| r.info.clone }
184
+
174
185
  seek(position)
175
186
  ary
176
187
  end
@@ -178,13 +189,13 @@ module HTS
178
189
  def format(key = nil)
179
190
  check_closed
180
191
  position = tell
181
- if key
182
- ary = map { |r| r.format(key) }
183
- else
184
- raise NotImplementedError
185
- # ary = each_copy.map { |r| r.format }
186
- # ary = map { |r| r.format.clone }
187
- end
192
+ raise NotImplementedError unless key
193
+
194
+ ary = map { |r| r.format(key) }
195
+
196
+ # ary = each_copy.map { |r| r.format }
197
+ # ary = map { |r| r.format.clone }
198
+
188
199
  seek(position)
189
200
  ary
190
201
  end
@@ -0,0 +1,64 @@
1
+ require_relative "../faidx"
2
+
3
+ module HTS
4
+ class Faidx
5
+ class Sequence
6
+ attr_reader :name, :faidx
7
+
8
+ def initialize(faidx, name)
9
+ raise unless faidx.has_key?(name)
10
+
11
+ @faidx = faidx
12
+ @name = name
13
+ end
14
+
15
+ def length
16
+ faidx.seq_len(name)
17
+ end
18
+ alias size length
19
+
20
+ def seq(start = nil, stop = nil)
21
+ faidx.seq(name, start, stop)
22
+ end
23
+
24
+ def qual(start = nil, stop = nil)
25
+ faidx.qual(name, start, stop)
26
+ end
27
+
28
+ def [](arg)
29
+ case arg
30
+ when Integer
31
+ if arg >= 0
32
+ start = arg
33
+ stop = arg
34
+ else
35
+ start = length + arg
36
+ stop = length + arg
37
+ end
38
+ when Range
39
+ arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
40
+ arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
41
+ if arg.begin.nil?
42
+ if arg.end.nil?
43
+ start = nil
44
+ stop = nil
45
+ else
46
+ start = 0
47
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
48
+ end
49
+ elsif arg.end.nil?
50
+ # always include the first base
51
+ start = arg.begin
52
+ stop = length - 1
53
+ else
54
+ start = arg.begin
55
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
56
+ end
57
+ else
58
+ raise ArgumentError
59
+ end
60
+ seq(start, stop)
61
+ end
62
+ end
63
+ end
64
+ end
data/lib/hts/faidx.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "../htslib"
4
+ require_relative "faidx/sequence"
4
5
 
5
6
  module HTS
6
7
  class Faidx
@@ -25,7 +26,11 @@ module HTS
25
26
  end
26
27
 
27
28
  @file_name = file_name
28
- @fai = LibHTS.fai_load(@file_name)
29
+ @fai = if [".fq", ".fastq"].include? File.extname(@file_name)
30
+ LibHTS.fai_load_format(@file_name, 2)
31
+ else
32
+ LibHTS.fai_load(@file_name)
33
+ end
29
34
 
30
35
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
36
  end
@@ -38,10 +43,9 @@ module HTS
38
43
  LibHTS.fai_destroy(@fai)
39
44
  end
40
45
 
41
- # FIXME: This doesn't seem to work as expected
42
- # def closed?
43
- # @fai.null?
44
- # end
46
+ def file_format
47
+ @fai[:format]
48
+ end
45
49
 
46
50
  # the number of sequences in the index.
47
51
  def length
@@ -50,31 +54,48 @@ module HTS
50
54
  alias size length
51
55
 
52
56
  # return the length of the requested chromosome.
53
- def chrom_size(chrom)
57
+ def names
58
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
59
+ end
60
+
61
+ alias keys names
62
+
63
+ def has_key?(key)
64
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
65
+
66
+ key = key.to_s
67
+ case LibHTS.faidx_has_seq(@fai, key)
68
+ when 1 then true
69
+ when 0 then false
70
+ else raise
71
+ end
72
+ end
73
+
74
+ def [](name)
75
+ name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
76
+ Sequence.new(self, name)
77
+ end
78
+
79
+ # return the length of the requested chromosome.
80
+ def seq_len(chrom)
54
81
  raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
55
82
 
56
83
  chrom = chrom.to_s
57
84
  result = LibHTS.faidx_seq_len(@fai, chrom)
58
85
  result == -1 ? nil : result
59
86
  end
60
- alias chrom_length chrom_size
61
87
 
62
- # return the length of the requested chromosome.
63
- def chrom_names
64
- Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
- end
66
-
67
- # @overload fetch(name)
88
+ # @overload seq(name)
68
89
  # Fetch the sequence as a String.
69
90
  # @param name [String] chr1:0-10
70
- # @overload fetch(name, start, stop)
91
+ # @overload seq(name, start, stop)
71
92
  # Fetch the sequence as a String.
72
93
  # @param name [String] the name of the chromosome
73
94
  # @param start [Integer] the start position of the sequence (0-based)
74
95
  # @param stop [Integer] the end position of the sequence (0-based)
75
96
  # @return [String] the sequence
76
97
 
77
- def seq(name, start = nil, stop = nil)
98
+ def fetch_seq(name, start = nil, stop = nil)
78
99
  name = name.to_s
79
100
  rlen = FFI::MemoryPointer.new(:int)
80
101
 
@@ -84,6 +105,7 @@ module HTS
84
105
  start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
106
  stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
107
  start > stop && raise(ArgumentError, "Expect start to be <= stop")
108
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
87
109
 
88
110
  result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
89
111
  end
@@ -95,5 +117,32 @@ module HTS
95
117
 
96
118
  result
97
119
  end
120
+
121
+ alias seq fetch_seq
122
+
123
+ def fetch_qual(name, start = nil, stop = nil)
124
+ name = name.to_s
125
+ rlen = FFI::MemoryPointer.new(:int)
126
+
127
+ if start.nil? && stop.nil?
128
+ result = LibHTS.fai_fetchqual(@fai, name, rlen)
129
+ else
130
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
131
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
132
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
133
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
134
+
135
+ result = LibHTS.faidx_fetch_qual(@fai, name, start, stop, rlen)
136
+ end
137
+
138
+ case rlen.read_int
139
+ when -2 then raise "Invalid chromosome name: #{name}"
140
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
141
+ end
142
+
143
+ result
144
+ end
145
+
146
+ alias qual fetch_qual
98
147
  end
99
148
  end
data/lib/hts/hts.rb CHANGED
@@ -69,6 +69,11 @@ module HTS
69
69
  @hts_file.nil? || @hts_file.null?
70
70
  end
71
71
 
72
+ def fai=(fai)
73
+ check_closed
74
+ LibHTS.hts_set_fai_filename(@hts_file, fai) > 0 || raise
75
+ end
76
+
72
77
  def set_threads(n = nil)
73
78
  if n.nil?
74
79
  require "etc"
@@ -107,14 +112,12 @@ module HTS
107
112
  end
108
113
 
109
114
  def rewind
110
- if @start_position
111
- r = seek(@start_position)
112
- raise "Failed to rewind: #{r}" if r < 0
115
+ raise "Cannot rewind: no start position" unless @start_position
113
116
 
114
- tell
115
- else
116
- raise "Cannot rewind: no start position"
117
- end
117
+ r = seek(@start_position)
118
+ raise "Failed to rewind: #{r}" if r < 0
119
+
120
+ tell
118
121
  end
119
122
 
120
123
  private
@@ -158,7 +158,7 @@ module HTS
158
158
  :specific, :pointer
159
159
  end
160
160
 
161
- class HtsIdx < FFI::Struct
161
+ class HtsIdx < FFI::Struct # FIXME: ManagedStruct
162
162
  layout \
163
163
  :fmt, :int,
164
164
  :min_shift, :int,
@@ -205,7 +205,7 @@ module HTS
205
205
  end
206
206
 
207
207
  # HtsFile
208
- class SamHdr < FFI::Struct
208
+ class SamHdr < FFI::ManagedStruct
209
209
  layout \
210
210
  :n_targets, :int32,
211
211
  :ignore_sam_err, :int32,
@@ -255,7 +255,7 @@ module HTS
255
255
 
256
256
  SamFile = HtsFile
257
257
 
258
- class HtsTpool < FFI::Struct
258
+ class HtsTpool < FFI::ManagedStruct
259
259
  layout \
260
260
  :pool, :pointer,
261
261
  :qsize, :int
@@ -277,7 +277,7 @@ module HTS
277
277
  :next, HtsOpt.ptr
278
278
  end
279
279
 
280
- class HtsItr < FFI::BitStruct
280
+ class HtsItr < FFI::BitStruct # FIXME: ManagedBitStruct
281
281
  layout \
282
282
  :_flags, :uint32, # bit_fields
283
283
  :tid, :int,
@@ -358,14 +358,14 @@ module HTS
358
358
  class BamPileupCd < FFI::Union
359
359
  layout \
360
360
  :p, :pointer,
361
- :i, :int64_t,
361
+ :i, :int64,
362
362
  :f, :double
363
363
  end
364
364
 
365
365
  class BamPileup1 < FFI::BitStruct
366
366
  layout \
367
367
  :b, Bam1.ptr,
368
- :qpos, :int32_t,
368
+ :qpos, :int32,
369
369
  :indel, :int,
370
370
  :level, :int,
371
371
  :_flags, :uint32, # bit_fields
@@ -391,9 +391,9 @@ module HTS
391
391
  :line_skip, :int32
392
392
  end
393
393
 
394
- class Tbx < FFI::Struct
394
+ class Tbx < FFI::ManagedStruct
395
395
  layout \
396
- :conf, TbxConf.ptr,
396
+ :conf, TbxConf,
397
397
  :idx, HtsIdx.ptr,
398
398
  :dict, :pointer
399
399
 
@@ -406,8 +406,8 @@ module HTS
406
406
 
407
407
  FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
408
408
 
409
- class Faidx < FFI::Struct
410
- layout :bgzf, BGZF,
409
+ class Faidx < FFI::Struct # FIXME: ManagedStruct
410
+ layout :bgzf, BGZF.ptr,
411
411
  :n, :int,
412
412
  :m, :int,
413
413
  :name, :pointer,
@@ -428,7 +428,7 @@ module HTS
428
428
  end
429
429
 
430
430
  # Complete textual representation of a header line
431
- class BcfHrec < FFI::Struct
431
+ class BcfHrec < FFI::ManagedStruct
432
432
  layout \
433
433
  :type, :int,
434
434
  :key, :string,
@@ -463,7 +463,7 @@ module HTS
463
463
 
464
464
  class BcfIdinfo < FFI::Struct
465
465
  layout \
466
- :info, [:uint64_t, 3],
466
+ :info, [:uint64, 3],
467
467
  :hrec, [BcfHrec.ptr, 3],
468
468
  :id, :int
469
469
  end
@@ -474,7 +474,7 @@ module HTS
474
474
  :val, BcfIdinfo.ptr
475
475
  end
476
476
 
477
- class BcfHdr < FFI::Struct
477
+ class BcfHdr < FFI::ManagedStruct
478
478
  layout \
479
479
  :n, [:int, 3],
480
480
  :id, [:pointer, 3], # BcfIdpair.ptr
@@ -536,7 +536,7 @@ module HTS
536
536
  layout \
537
537
  :pos, :hts_pos_t,
538
538
  :rlen, :hts_pos_t,
539
- :rid, :int32_t,
539
+ :rid, :int32,
540
540
  :qual, :float,
541
541
  :_n_info_allele, :uint32,
542
542
  :_n_fmt_sample, :uint32,
@@ -352,7 +352,7 @@ module HTS
352
352
  # As int32_decoded/encode, but from/to blocks instead of cram_fd
353
353
  attach_function \
354
354
  :int32_put_blk,
355
- %i[cram_block int32_t],
355
+ %i[cram_block int32],
356
356
  :int
357
357
 
358
358
  # Returns the refs_t structure used by a cram file handle.
@@ -242,13 +242,13 @@ module HTS
242
242
  [Bam1,
243
243
  :size_t,
244
244
  :string,
245
- :uint16_t,
246
- :int32_t,
245
+ :uint16,
246
+ :int32,
247
247
  :hts_pos_t,
248
- :uint8_t,
248
+ :uint8,
249
249
  :size_t,
250
250
  :string,
251
- :int32_t,
251
+ :int32,
252
252
  :hts_pos_t,
253
253
  :hts_pos_t,
254
254
  :size_t,
@@ -68,3 +68,5 @@ module HTS
68
68
  :void
69
69
  end
70
70
  end
71
+
72
+ require_relative "tbx_funcs"
@@ -8,7 +8,7 @@ module HTS
8
8
  end
9
9
 
10
10
  def tbx_itr_queryi(tbx, tid, beg, end_)
11
- hts_itr_query(tbx[:idx], tid, beg, end_, tbx_readrec)
11
+ hts_itr_query(tbx[:idx], tid, beg, end_, @@tbx_readrec)
12
12
  end
13
13
 
14
14
  def tbx_itr_querys(tbx, s)
@@ -469,7 +469,7 @@ module HTS
469
469
  attach_function \
470
470
  :bcf_fmt_sized_array,
471
471
  [KString, :pointer],
472
- :uint8_t
472
+ :uint8
473
473
 
474
474
  # Encode a variable-length char array in BCF format
475
475
  attach_function \