htslib 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bam.rb CHANGED
@@ -59,7 +59,7 @@ module HTS
59
59
  @start_position = tell
60
60
  end
61
61
 
62
- def build_index(index_name = nil, min_shift: 0)
62
+ def build_index(index_name = nil, min_shift: 0, threads: 2)
63
63
  check_closed
64
64
 
65
65
  if index_name
@@ -67,10 +67,15 @@ module HTS
67
67
  else
68
68
  warn "Create index for #{@file_name}"
69
69
  end
70
- r = LibHTS.sam_index_build3(@file_name, index_name, min_shift, @nthreads)
71
- raise "Failed to build index for #{@file_name}" if r < 0
72
-
73
- self
70
+ case LibHTS.sam_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
71
+ when 0 # successful
72
+ when -1 then raise "indexing failed"
73
+ when -2 then raise "opening #{@file_name} failed"
74
+ when -3 then raise "format not indexable"
75
+ when -4 then raise "failed to create and/or save the index"
76
+ else raise "unknown error"
77
+ end
78
+ self # for method chaining
74
79
  end
75
80
 
76
81
  def load_index(index_name = nil)
@@ -95,11 +100,6 @@ module HTS
95
100
  super
96
101
  end
97
102
 
98
- def fai=(fai)
99
- check_closed
100
- LibHTS.hts_set_fai_filename(@hts_file, fai) > 0 || raise
101
- end
102
-
103
103
  def write_header(header)
104
104
  check_closed
105
105
 
@@ -107,14 +107,22 @@ module HTS
107
107
  LibHTS.sam_hdr_write(@hts_file, header)
108
108
  end
109
109
 
110
- def write(aln)
110
+ def header=(header)
111
+ write_header(header)
112
+ end
113
+
114
+ def write(record)
111
115
  check_closed
112
116
 
113
- aln_dup = aln.dup
114
- r = LibHTS.sam_write1(@hts_file, header, aln_dup)
117
+ # record = record.dup
118
+ r = LibHTS.sam_write1(@hts_file, header, record)
115
119
  raise "Failed to write record" if r < 0
116
120
  end
117
121
 
122
+ def <<(aln)
123
+ write(aln)
124
+ end
125
+
118
126
  def each(copy: false, &block)
119
127
  if copy
120
128
  each_record_copy(&block)
@@ -8,30 +8,10 @@ module HTS
8
8
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
9
9
  end
10
10
 
11
- # For compatibility with HTS.cr.
12
- def get_int(key)
13
- get(key, :int)
14
- end
15
-
16
- # For compatibility with HTS.cr.
17
- def get_float(key)
18
- get(key, :float)
19
- end
20
-
21
- # For compatibility with HTS.cr.
22
- def get_flag(key)
23
- get(key, :flag)
24
- end
25
-
26
- # For compatibility with HTS.cr.
27
- def get_string(key)
28
- get(key, :string)
29
- end
30
-
31
- def [](key)
32
- get(key)
33
- end
34
-
11
+ # @note: Why is this method named "get" instead of "fetch"?
12
+ # This is for compatibility with the Crystal language
13
+ # which provides methods like `get_int`, `get_float`, etc.
14
+ # I think they are better than `fetch_int`` and `fetch_float`.
35
15
  def get(key, type = nil)
36
16
  n = FFI::MemoryPointer.new(:int)
37
17
  p1 = @p1
@@ -73,6 +53,30 @@ module HTS
73
53
  end
74
54
  end
75
55
 
56
+ # For compatibility with HTS.cr.
57
+ def get_int(key)
58
+ get(key, :int)
59
+ end
60
+
61
+ # For compatibility with HTS.cr.
62
+ def get_float(key)
63
+ get(key, :float)
64
+ end
65
+
66
+ # For compatibility with HTS.cr.
67
+ def get_flag(key)
68
+ get(key, :flag)
69
+ end
70
+
71
+ # For compatibility with HTS.cr.
72
+ def get_string(key)
73
+ get(key, :string)
74
+ end
75
+
76
+ def [](key)
77
+ get(key)
78
+ end
79
+
76
80
  def fields
77
81
  ids.map do |id|
78
82
  name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, id)
data/lib/hts/bcf/info.rb CHANGED
@@ -9,31 +9,11 @@ module HTS
9
9
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
10
10
  end
11
11
 
12
- # For compatibility with HTS.cr.
13
- def get_int(key)
14
- get(key, :int)
15
- end
16
-
17
- # For compatibility with HTS.cr.
18
- def get_float(key)
19
- get(key, :float)
20
- end
21
-
22
- # For compatibility with HTS.cr.
23
- def get_string(key)
24
- get(key, :string)
25
- end
26
-
27
- # For compatibility with HTS.cr.
28
- def get_flag(key)
29
- get(key, :flag)
30
- end
31
-
32
- def [](key)
33
- get(key)
34
- end
35
-
36
12
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
13
+ # @note: Why is this method named "get" instead of "fetch"?
14
+ # This is for compatibility with the Crystal language
15
+ # which provides methods like `get_int`, `get_float`, etc.
16
+ # I think they are better than `fetch_int`` and `fetch_float`.
37
17
  def get(key, type = nil)
38
18
  n = FFI::MemoryPointer.new(:int)
39
19
  p1 = @p1
@@ -70,6 +50,30 @@ module HTS
70
50
  end
71
51
  end
72
52
 
53
+ # For compatibility with HTS.cr.
54
+ def get_int(key)
55
+ get(key, :int)
56
+ end
57
+
58
+ # For compatibility with HTS.cr.
59
+ def get_float(key)
60
+ get(key, :float)
61
+ end
62
+
63
+ # For compatibility with HTS.cr.
64
+ def get_string(key)
65
+ get(key, :string)
66
+ end
67
+
68
+ # For compatibility with HTS.cr.
69
+ def get_flag(key)
70
+ get(key, :flag)
71
+ end
72
+
73
+ def [](key)
74
+ get(key)
75
+ end
76
+
73
77
  # FIXME: naming? room for improvement.
74
78
  def fields
75
79
  keys.map do |key|
data/lib/hts/bcf.rb CHANGED
@@ -54,7 +54,7 @@ module HTS
54
54
  @start_position = tell
55
55
  end
56
56
 
57
- def build_index(index_name = nil, min_shift: 14)
57
+ def build_index(index_name = nil, min_shift: 14, threads: 2)
58
58
  check_closed
59
59
 
60
60
  if index_name
@@ -62,10 +62,15 @@ module HTS
62
62
  else
63
63
  warn "Create index for #{@file_name}"
64
64
  end
65
- r = LibHTS.bcf_index_build3(@file_name, index_name, min_shift, @nthreads)
66
- raise "Failed to build index for #{@file_name}" if r < 0
67
-
68
- self
65
+ case LibHTS.bcf_index_build3(@file_name, index_name, min_shift, (@nthreads || threads))
66
+ when 0 # sccessful
67
+ when -1 then raise "indexing failed"
68
+ when -2 then raise "opening #{@file_name} failed"
69
+ when -3 then raise "format not indexable"
70
+ when -4 then raise "failed to create and/or save the index"
71
+ else raise "unknown error"
72
+ end
73
+ self # for method chaining
69
74
  end
70
75
 
71
76
  def load_index(index_name = nil)
@@ -97,15 +102,21 @@ module HTS
97
102
  LibHTS.bcf_hdr_write(@hts_file, header)
98
103
  end
99
104
 
100
- def write(var)
105
+ def header=(header)
106
+ write_header(header)
107
+ end
108
+
109
+ def write(record)
101
110
  check_closed
102
111
 
103
- var_dup = var.dup
104
- r = LibHTS.bcf_write(@hts_file, header, var_dup)
112
+ # record = record.dup
113
+ r = LibHTS.bcf_write(@hts_file, header, record)
105
114
  raise "Failed to write record" if r < 0
106
115
  end
107
116
 
108
- # Close the current file.
117
+ def <<(var)
118
+ write(var)
119
+ end
109
120
 
110
121
  def nsamples
111
122
  check_closed
@@ -164,13 +175,13 @@ module HTS
164
175
  def info(key = nil)
165
176
  check_closed
166
177
  position = tell
167
- if key
168
- ary = map { |r| r.info(key) }
169
- else
170
- raise NotImplementedError
171
- # ary = each_copy.map { |r| r.info }
172
- # ary = map { |r| r.info.clone }
173
- end
178
+ raise NotImplementedError unless key
179
+
180
+ ary = map { |r| r.info(key) }
181
+
182
+ # ary = each_copy.map { |r| r.info }
183
+ # ary = map { |r| r.info.clone }
184
+
174
185
  seek(position)
175
186
  ary
176
187
  end
@@ -178,13 +189,13 @@ module HTS
178
189
  def format(key = nil)
179
190
  check_closed
180
191
  position = tell
181
- if key
182
- ary = map { |r| r.format(key) }
183
- else
184
- raise NotImplementedError
185
- # ary = each_copy.map { |r| r.format }
186
- # ary = map { |r| r.format.clone }
187
- end
192
+ raise NotImplementedError unless key
193
+
194
+ ary = map { |r| r.format(key) }
195
+
196
+ # ary = each_copy.map { |r| r.format }
197
+ # ary = map { |r| r.format.clone }
198
+
188
199
  seek(position)
189
200
  ary
190
201
  end
@@ -0,0 +1,64 @@
1
+ require_relative "../faidx"
2
+
3
+ module HTS
4
+ class Faidx
5
+ class Sequence
6
+ attr_reader :name, :faidx
7
+
8
+ def initialize(faidx, name)
9
+ raise unless faidx.has_key?(name)
10
+
11
+ @faidx = faidx
12
+ @name = name
13
+ end
14
+
15
+ def length
16
+ faidx.seq_len(name)
17
+ end
18
+ alias size length
19
+
20
+ def seq(start = nil, stop = nil)
21
+ faidx.seq(name, start, stop)
22
+ end
23
+
24
+ def qual(start = nil, stop = nil)
25
+ faidx.qual(name, start, stop)
26
+ end
27
+
28
+ def [](arg)
29
+ case arg
30
+ when Integer
31
+ if arg >= 0
32
+ start = arg
33
+ stop = arg
34
+ else
35
+ start = length + arg
36
+ stop = length + arg
37
+ end
38
+ when Range
39
+ arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
40
+ arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
41
+ if arg.begin.nil?
42
+ if arg.end.nil?
43
+ start = nil
44
+ stop = nil
45
+ else
46
+ start = 0
47
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
48
+ end
49
+ elsif arg.end.nil?
50
+ # always include the first base
51
+ start = arg.begin
52
+ stop = length - 1
53
+ else
54
+ start = arg.begin
55
+ stop = arg.exclude_end? ? arg.end - 1 : arg.end
56
+ end
57
+ else
58
+ raise ArgumentError
59
+ end
60
+ seq(start, stop)
61
+ end
62
+ end
63
+ end
64
+ end
data/lib/hts/faidx.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "../htslib"
4
+ require_relative "faidx/sequence"
4
5
 
5
6
  module HTS
6
7
  class Faidx
@@ -25,7 +26,11 @@ module HTS
25
26
  end
26
27
 
27
28
  @file_name = file_name
28
- @fai = LibHTS.fai_load(@file_name)
29
+ @fai = if [".fq", ".fastq"].include? File.extname(@file_name)
30
+ LibHTS.fai_load_format(@file_name, 2)
31
+ else
32
+ LibHTS.fai_load(@file_name)
33
+ end
29
34
 
30
35
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
36
  end
@@ -38,10 +43,9 @@ module HTS
38
43
  LibHTS.fai_destroy(@fai)
39
44
  end
40
45
 
41
- # FIXME: This doesn't seem to work as expected
42
- # def closed?
43
- # @fai.null?
44
- # end
46
+ def file_format
47
+ @fai[:format]
48
+ end
45
49
 
46
50
  # the number of sequences in the index.
47
51
  def length
@@ -50,31 +54,48 @@ module HTS
50
54
  alias size length
51
55
 
52
56
  # return the length of the requested chromosome.
53
- def chrom_size(chrom)
57
+ def names
58
+ Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
59
+ end
60
+
61
+ alias keys names
62
+
63
+ def has_key?(key)
64
+ raise ArgumentError, "Expect chrom to be String or Symbol" unless key.is_a?(String) || key.is_a?(Symbol)
65
+
66
+ key = key.to_s
67
+ case LibHTS.faidx_has_seq(@fai, key)
68
+ when 1 then true
69
+ when 0 then false
70
+ else raise
71
+ end
72
+ end
73
+
74
+ def [](name)
75
+ name = LibHTS.faidx_iseq(@fai, name) if name.is_a?(Integer)
76
+ Sequence.new(self, name)
77
+ end
78
+
79
+ # return the length of the requested chromosome.
80
+ def seq_len(chrom)
54
81
  raise ArgumentError, "Expect chrom to be String or Symbol" unless chrom.is_a?(String) || chrom.is_a?(Symbol)
55
82
 
56
83
  chrom = chrom.to_s
57
84
  result = LibHTS.faidx_seq_len(@fai, chrom)
58
85
  result == -1 ? nil : result
59
86
  end
60
- alias chrom_length chrom_size
61
87
 
62
- # return the length of the requested chromosome.
63
- def chrom_names
64
- Array.new(length) { |i| LibHTS.faidx_iseq(@fai, i) }
65
- end
66
-
67
- # @overload fetch(name)
88
+ # @overload seq(name)
68
89
  # Fetch the sequence as a String.
69
90
  # @param name [String] chr1:0-10
70
- # @overload fetch(name, start, stop)
91
+ # @overload seq(name, start, stop)
71
92
  # Fetch the sequence as a String.
72
93
  # @param name [String] the name of the chromosome
73
94
  # @param start [Integer] the start position of the sequence (0-based)
74
95
  # @param stop [Integer] the end position of the sequence (0-based)
75
96
  # @return [String] the sequence
76
97
 
77
- def seq(name, start = nil, stop = nil)
98
+ def fetch_seq(name, start = nil, stop = nil)
78
99
  name = name.to_s
79
100
  rlen = FFI::MemoryPointer.new(:int)
80
101
 
@@ -84,6 +105,7 @@ module HTS
84
105
  start < 0 && raise(ArgumentError, "Expect start to be >= 0")
85
106
  stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
86
107
  start > stop && raise(ArgumentError, "Expect start to be <= stop")
108
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
87
109
 
88
110
  result = LibHTS.faidx_fetch_seq(@fai, name, start, stop, rlen)
89
111
  end
@@ -95,5 +117,32 @@ module HTS
95
117
 
96
118
  result
97
119
  end
120
+
121
+ alias seq fetch_seq
122
+
123
+ def fetch_qual(name, start = nil, stop = nil)
124
+ name = name.to_s
125
+ rlen = FFI::MemoryPointer.new(:int)
126
+
127
+ if start.nil? && stop.nil?
128
+ result = LibHTS.fai_fetchqual(@fai, name, rlen)
129
+ else
130
+ start < 0 && raise(ArgumentError, "Expect start to be >= 0")
131
+ stop < 0 && raise(ArgumentError, "Expect stop to be >= 0")
132
+ start > stop && raise(ArgumentError, "Expect start to be <= stop")
133
+ stop >= seq_len(name) && raise(ArgumentError, "Expect stop to be < seq_len")
134
+
135
+ result = LibHTS.faidx_fetch_qual(@fai, name, start, stop, rlen)
136
+ end
137
+
138
+ case rlen.read_int
139
+ when -2 then raise "Invalid chromosome name: #{name}"
140
+ when -1 then raise "Error fetching sequence: #{name}:#{start}-#{stop}"
141
+ end
142
+
143
+ result
144
+ end
145
+
146
+ alias qual fetch_qual
98
147
  end
99
148
  end
data/lib/hts/hts.rb CHANGED
@@ -69,6 +69,11 @@ module HTS
69
69
  @hts_file.nil? || @hts_file.null?
70
70
  end
71
71
 
72
+ def fai=(fai)
73
+ check_closed
74
+ LibHTS.hts_set_fai_filename(@hts_file, fai) > 0 || raise
75
+ end
76
+
72
77
  def set_threads(n = nil)
73
78
  if n.nil?
74
79
  require "etc"
@@ -107,14 +112,12 @@ module HTS
107
112
  end
108
113
 
109
114
  def rewind
110
- if @start_position
111
- r = seek(@start_position)
112
- raise "Failed to rewind: #{r}" if r < 0
115
+ raise "Cannot rewind: no start position" unless @start_position
113
116
 
114
- tell
115
- else
116
- raise "Cannot rewind: no start position"
117
- end
117
+ r = seek(@start_position)
118
+ raise "Failed to rewind: #{r}" if r < 0
119
+
120
+ tell
118
121
  end
119
122
 
120
123
  private
@@ -158,7 +158,7 @@ module HTS
158
158
  :specific, :pointer
159
159
  end
160
160
 
161
- class HtsIdx < FFI::Struct
161
+ class HtsIdx < FFI::Struct # FIXME: ManagedStruct
162
162
  layout \
163
163
  :fmt, :int,
164
164
  :min_shift, :int,
@@ -205,7 +205,7 @@ module HTS
205
205
  end
206
206
 
207
207
  # HtsFile
208
- class SamHdr < FFI::Struct
208
+ class SamHdr < FFI::ManagedStruct
209
209
  layout \
210
210
  :n_targets, :int32,
211
211
  :ignore_sam_err, :int32,
@@ -255,7 +255,7 @@ module HTS
255
255
 
256
256
  SamFile = HtsFile
257
257
 
258
- class HtsTpool < FFI::Struct
258
+ class HtsTpool < FFI::ManagedStruct
259
259
  layout \
260
260
  :pool, :pointer,
261
261
  :qsize, :int
@@ -277,7 +277,7 @@ module HTS
277
277
  :next, HtsOpt.ptr
278
278
  end
279
279
 
280
- class HtsItr < FFI::BitStruct
280
+ class HtsItr < FFI::BitStruct # FIXME: ManagedBitStruct
281
281
  layout \
282
282
  :_flags, :uint32, # bit_fields
283
283
  :tid, :int,
@@ -358,14 +358,14 @@ module HTS
358
358
  class BamPileupCd < FFI::Union
359
359
  layout \
360
360
  :p, :pointer,
361
- :i, :int64_t,
361
+ :i, :int64,
362
362
  :f, :double
363
363
  end
364
364
 
365
365
  class BamPileup1 < FFI::BitStruct
366
366
  layout \
367
367
  :b, Bam1.ptr,
368
- :qpos, :int32_t,
368
+ :qpos, :int32,
369
369
  :indel, :int,
370
370
  :level, :int,
371
371
  :_flags, :uint32, # bit_fields
@@ -391,9 +391,9 @@ module HTS
391
391
  :line_skip, :int32
392
392
  end
393
393
 
394
- class Tbx < FFI::Struct
394
+ class Tbx < FFI::ManagedStruct
395
395
  layout \
396
- :conf, TbxConf.ptr,
396
+ :conf, TbxConf,
397
397
  :idx, HtsIdx.ptr,
398
398
  :dict, :pointer
399
399
 
@@ -406,8 +406,8 @@ module HTS
406
406
 
407
407
  FaiFormatOptions = enum(:FAI_NONE, :FAI_FASTA, :FAI_FASTQ)
408
408
 
409
- class Faidx < FFI::Struct
410
- layout :bgzf, BGZF,
409
+ class Faidx < FFI::Struct # FIXME: ManagedStruct
410
+ layout :bgzf, BGZF.ptr,
411
411
  :n, :int,
412
412
  :m, :int,
413
413
  :name, :pointer,
@@ -428,7 +428,7 @@ module HTS
428
428
  end
429
429
 
430
430
  # Complete textual representation of a header line
431
- class BcfHrec < FFI::Struct
431
+ class BcfHrec < FFI::ManagedStruct
432
432
  layout \
433
433
  :type, :int,
434
434
  :key, :string,
@@ -463,7 +463,7 @@ module HTS
463
463
 
464
464
  class BcfIdinfo < FFI::Struct
465
465
  layout \
466
- :info, [:uint64_t, 3],
466
+ :info, [:uint64, 3],
467
467
  :hrec, [BcfHrec.ptr, 3],
468
468
  :id, :int
469
469
  end
@@ -474,7 +474,7 @@ module HTS
474
474
  :val, BcfIdinfo.ptr
475
475
  end
476
476
 
477
- class BcfHdr < FFI::Struct
477
+ class BcfHdr < FFI::ManagedStruct
478
478
  layout \
479
479
  :n, [:int, 3],
480
480
  :id, [:pointer, 3], # BcfIdpair.ptr
@@ -536,7 +536,7 @@ module HTS
536
536
  layout \
537
537
  :pos, :hts_pos_t,
538
538
  :rlen, :hts_pos_t,
539
- :rid, :int32_t,
539
+ :rid, :int32,
540
540
  :qual, :float,
541
541
  :_n_info_allele, :uint32,
542
542
  :_n_fmt_sample, :uint32,
@@ -352,7 +352,7 @@ module HTS
352
352
  # As int32_decoded/encode, but from/to blocks instead of cram_fd
353
353
  attach_function \
354
354
  :int32_put_blk,
355
- %i[cram_block int32_t],
355
+ %i[cram_block int32],
356
356
  :int
357
357
 
358
358
  # Returns the refs_t structure used by a cram file handle.
@@ -242,13 +242,13 @@ module HTS
242
242
  [Bam1,
243
243
  :size_t,
244
244
  :string,
245
- :uint16_t,
246
- :int32_t,
245
+ :uint16,
246
+ :int32,
247
247
  :hts_pos_t,
248
- :uint8_t,
248
+ :uint8,
249
249
  :size_t,
250
250
  :string,
251
- :int32_t,
251
+ :int32,
252
252
  :hts_pos_t,
253
253
  :hts_pos_t,
254
254
  :size_t,
@@ -68,3 +68,5 @@ module HTS
68
68
  :void
69
69
  end
70
70
  end
71
+
72
+ require_relative "tbx_funcs"
@@ -8,7 +8,7 @@ module HTS
8
8
  end
9
9
 
10
10
  def tbx_itr_queryi(tbx, tid, beg, end_)
11
- hts_itr_query(tbx[:idx], tid, beg, end_, tbx_readrec)
11
+ hts_itr_query(tbx[:idx], tid, beg, end_, @@tbx_readrec)
12
12
  end
13
13
 
14
14
  def tbx_itr_querys(tbx, s)
@@ -469,7 +469,7 @@ module HTS
469
469
  attach_function \
470
470
  :bcf_fmt_sized_array,
471
471
  [KString, :pointer],
472
- :uint8_t
472
+ :uint8
473
473
 
474
474
  # Encode a variable-length char array in BCF format
475
475
  attach_function \