htslib 0.0.5 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Header
6
- def initialize(h)
7
- @h = h
6
+ def initialize(hts_file)
7
+ @bcf_hdr = LibHTS.bcf_hdr_read(hts_file)
8
8
  end
9
9
 
10
10
  def struct
11
- @h
11
+ @bcf_hdr
12
12
  end
13
13
 
14
14
  def to_ptr
15
- @h.to_ptr
15
+ @bcf_hdr.to_ptr
16
+ end
17
+
18
+ def get_version
19
+ LibHTS.bcf_hdr_get_version(@bcf_hdr)
20
+ end
21
+
22
+ def nsamples
23
+ LibHTS.bcf_hdr_nsamples(@bcf_hdr)
24
+ end
25
+
26
+ def samples
27
+ # bcf_hdr_id2name is macro function
28
+ @bcf_hdr[:samples]
29
+ .read_array_of_pointer(nsamples)
30
+ .map(&:read_string)
31
+ end
32
+
33
+ def to_s
34
+ kstr = LibHTS::KString.new
35
+ raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
36
+
37
+ kstr[:s]
38
+ end
39
+
40
+ private
41
+
42
+ def initialize_copy(orig)
43
+ @bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
16
44
  end
17
45
  end
18
46
  end
data/lib/hts/bcf/info.rb CHANGED
@@ -1,17 +1,38 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Info
6
6
  def initialize(record)
7
7
  @record = record
8
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
9
+ end
10
+
11
+ # For compatibility with htslib.cr.
12
+ def get_int(key)
13
+ get(key, :int)
14
+ end
15
+
16
+ # For compatibility with htslib.cr.
17
+ def get_float(key)
18
+ get(key, :float)
19
+ end
20
+
21
+ # For compatibility with htslib.cr.
22
+ def get_string(key)
23
+ get(key, :string)
24
+ end
25
+
26
+ # For compatibility with htslib.cr.
27
+ def get_flag(key)
28
+ get(key, :flag)
8
29
  end
9
30
 
10
31
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
11
32
  def get(key, type = nil)
12
33
  n = FFI::MemoryPointer.new(:int)
13
- p1 = @record.p1
14
- h = @record.bcf.header.struct
34
+ p1 = @p1
35
+ h = @record.header.struct
15
36
  r = @record.struct
16
37
 
17
38
  info_values = proc do |type|
@@ -44,6 +65,7 @@ module HTS
44
65
  end
45
66
  end
46
67
 
68
+ # FIXME: naming? room for improvement.
47
69
  def fields
48
70
  n_info = @record.struct[:n_info]
49
71
  Array.new(n_info) do |i|
@@ -53,10 +75,10 @@ module HTS
53
75
  )
54
76
  {
55
77
  name: LibHTS.bcf_hdr_int2id(
56
- @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
78
+ @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
57
79
  ),
58
80
  n: LibHTS.bcf_hdr_id2number(
59
- @record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
81
+ @record.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
60
82
  ),
61
83
  vtype: fld[:type], i: fld[:key]
62
84
  }
@@ -72,7 +94,7 @@ module HTS
72
94
  i * LibHTS::BcfInfo.size
73
95
  )
74
96
  id = LibHTS.bcf_hdr_int2id(
75
- @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
97
+ @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
76
98
  )
77
99
  return fld[:type] if id == key
78
100
  end
@@ -1,22 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Record
6
- def initialize(bcf_t, bcf)
6
+ def initialize(bcf_t, header)
7
7
  @bcf1 = bcf_t
8
- @bcf = bcf
9
- @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
8
+ @header = header
10
9
  end
11
10
 
12
- attr_reader :p1, :bcf
11
+ attr_reader :header
13
12
 
14
13
  def struct
15
14
  @bcf1
16
15
  end
17
16
 
18
17
  def to_ptr
19
- @bcf.to_ptr
18
+ @bcf1.to_ptr
20
19
  end
21
20
 
22
21
  # def inspect; end
@@ -26,10 +25,9 @@ module HTS
26
25
  def genotypes; end
27
26
 
28
27
  def chrom
29
- hdr = @bcf.header.struct
30
28
  rid = @bcf1[:rid]
31
29
 
32
- LibHTS.bcf_hdr_id2name(hdr, rid)
30
+ LibHTS.bcf_hdr_id2name(@header.struct, rid)
33
31
  end
34
32
 
35
33
  def pos
@@ -59,11 +57,13 @@ module HTS
59
57
  "PASS"
60
58
  when 1
61
59
  i = d[:flt].read_int
62
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
60
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
63
61
  when 2
64
62
  d[:flt].get_array_of_int(0, n_flt).map do |i|
65
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
63
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
66
64
  end
65
+ else
66
+ raise "Unexpected number of filters. n_flt: #{n_flt}"
67
67
  end
68
68
  end
69
69
 
@@ -80,13 +80,14 @@ module HTS
80
80
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
81
81
  @bcf1[:d][:allele].get_array_of_pointer(
82
82
  FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
83
- ).map { |c| c.read_string }
83
+ ).map(&:read_string)
84
84
  end
85
85
 
86
86
  def alleles
87
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
87
88
  @bcf1[:d][:allele].get_array_of_pointer(
88
89
  0, @bcf1[:n_allele]
89
- ).map { |c| c.read_string }
90
+ ).map(&:read_string)
90
91
  end
91
92
 
92
93
  def info
@@ -101,10 +102,17 @@ module HTS
101
102
 
102
103
  def to_s
103
104
  ksr = LibHTS::KString.new
104
- raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
105
+ raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
105
106
 
106
107
  ksr[:s]
107
108
  end
109
+
110
+ private
111
+
112
+ def initialize_copy(orig)\
113
+ @header = orig.header
114
+ @bcf1 = LibHTS.bcf_dup(orig.struct)
115
+ end
108
116
  end
109
117
  end
110
118
  end
data/lib/hts/bcf.rb CHANGED
@@ -3,71 +3,109 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "../htslib"
7
+
8
+ require_relative "hts"
6
9
  require_relative "bcf/header"
7
10
  require_relative "bcf/info"
8
11
  require_relative "bcf/format"
9
12
  require_relative "bcf/record"
10
13
 
11
14
  module HTS
12
- class Bcf
15
+ class Bcf < Hts
13
16
  include Enumerable
14
17
 
15
- attr_reader :file_path, :mode, :header
16
- # HtfFile is FFI::BitStruct
17
- attr_reader :htf_file
18
+ attr_reader :file_name, :index_path, :mode, :header
18
19
 
19
- class << self
20
- alias open new
21
- end
20
+ def self.open(*args, **kw)
21
+ file = new(*args, **kw) # do not yield
22
+ return file unless block_given?
22
23
 
23
- def initialize(file_path, mode = "r")
24
- file_path = File.expand_path(file_path)
24
+ begin
25
+ yield file
26
+ ensure
27
+ file.close
28
+ end
29
+ file
30
+ end
25
31
 
26
- unless File.exist?(file_path)
27
- message = "No such VCF/BCF file - #{file_path}"
32
+ def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
33
+ create_index: false)
34
+ if block_given?
35
+ message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
28
36
  raise message
29
37
  end
30
38
 
31
- @file_path = file_path
39
+ # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
40
+
41
+ @file_name = file_name
32
42
  @mode = mode
33
- @htf_file = LibHTS.hts_open(file_path, mode)
34
- @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
43
+ @hts_file = LibHTS.hts_open(@file_name, mode)
35
44
 
36
- # IO like API
37
- if block_given?
38
- begin
39
- yield self
40
- ensure
41
- close
42
- end
45
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
46
+
47
+ if threads&.> 0
48
+ r = LibHTS.hts_set_threads(@hts_file, threads)
49
+ raise "Failed to set number of threads: #{threads}" if r < 0
43
50
  end
51
+
52
+ return if @mode[0] == "w"
53
+
54
+ @header = Bcf::Header.new(@hts_file)
44
55
  end
45
56
 
46
- def struct
47
- htf_file
57
+ def write_header
58
+ @header = header.dup
59
+ LibHTS.hts_set_fai_filename(header, @file_name)
60
+ LibHTS.bcf_hdr_write(@hts_file, header.struct)
48
61
  end
49
62
 
50
- def to_ptr
51
- htf_file.to_ptr
63
+ def write(var)
64
+ var_dup = var.dup = var.dup
65
+ LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
52
66
  end
53
67
 
54
68
  # Close the current file.
55
69
  def close
56
- LibHTS.hts_close(htf_file)
70
+ LibHTS.hts_close(@hts_file)
71
+ @hts_file = nil
57
72
  end
58
73
 
59
- def each
74
+ def closed?
75
+ @hts_file.nil?
76
+ end
77
+
78
+ def nsamples
79
+ header.nsamples
80
+ end
81
+
82
+ def samples
83
+ header.samples
84
+ end
85
+
86
+ # Iterate over each record.
87
+ # Generate a new Record object each time.
88
+ # Slower than each.
89
+ def each_copy
60
90
  return to_enum(__method__) unless block_given?
61
91
 
62
- while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
63
- record = Record.new(bcf1, self)
92
+ while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
93
+ record = Record.new(bcf1, header)
64
94
  yield record
65
95
  end
66
96
  self
67
97
  end
68
98
 
69
- def n_samples
70
- LibHTS.bcf_hdr_nsamples(header.struct)
99
+ # Iterate over each record.
100
+ # Record object is reused.
101
+ # Faster than each_copy.
102
+ def each
103
+ return to_enum(__method__) unless block_given?
104
+
105
+ bcf1 = LibHTS.bcf_init
106
+ record = Record.new(bcf1, header)
107
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
108
+ self
71
109
  end
72
110
  end
73
111
  end
data/lib/hts/faidx.rb CHANGED
@@ -3,17 +3,19 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "../htslib"
7
+
6
8
  module HTS
7
9
  class Faidx
8
- attr_reader :file_path
10
+ attr_reader :file_name
9
11
 
10
12
  class << self
11
13
  alias open new
12
14
  end
13
15
 
14
- def initialize(file_path)
15
- @file_path = File.expand_path(file_path)
16
- @fai = LibHTS.fai_load(file_path)
16
+ def initialize(file_name)
17
+ @file_name = file_name
18
+ @fai = LibHTS.fai_load(@file_name)
17
19
 
18
20
  # IO like API
19
21
  if block_given?
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'ffi/bit_field'
3
+ require "ffi/bit_field"
4
4
 
5
5
  module FFI
6
6
  class Struct
data/lib/hts/hts.rb ADDED
@@ -0,0 +1,56 @@
1
+ require_relative "../htslib"
2
+
3
+ module HTS
4
+ class Hts
5
+ def struct
6
+ @hts_file
7
+ end
8
+
9
+ def to_ptr
10
+ @hts_file.to_ptr
11
+ end
12
+
13
+ def format
14
+ LibHTS.hts_get_format(@hts_file)[:format].to_s
15
+ end
16
+
17
+ def format_version
18
+ v = LibHTS.hts_get_format(@hts_file)[:version]
19
+ major = v[:major]
20
+ minor = v[:minor]
21
+ if minor == -1
22
+ major.to_s
23
+ else
24
+ "#{major}.#{minor}"
25
+ end
26
+ end
27
+
28
+ def seek(offset)
29
+ if @hts_file[:is_cram] == 1
30
+ LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
31
+ elsif @hts_file[:is_bgzf] == 1
32
+ LibHTS.bgzf_seek(@hts_file[:fp][:bgzf], offset, IO::SEEK_SET)
33
+ else
34
+ LibHTS.hseek(@hts_file[:fp][:hfile], offset, IO::SEEK_SET)
35
+ end
36
+ end
37
+
38
+ def tell
39
+ if @hts_file[:is_cram] == 1
40
+ # LibHTS.cram_tell(@hts_file[:fp][:cram])
41
+ # warn 'cram_tell is not implemented in c htslib'
42
+ nil
43
+ elsif @hts_file[:is_bgzf] == 1
44
+ LibHTS.bgzf_tell(@hts_file[:fp][:bgzf])
45
+ else
46
+ LibHTS.htell(@hts_file[:fp][:hfile])
47
+ end
48
+ end
49
+
50
+ def rewind
51
+ r = seek(@start_position) if @start_position
52
+ raise "Failed to rewind: #{r}" if r < 0
53
+ r
54
+ end
55
+ end
56
+ end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- %i[HFILE string],
21
+ [HFILE, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [:HFILE],
27
+ [HFILE],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- %i[HFILE pointer size_t],
33
+ [HFILE, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -75,6 +75,11 @@ module HTS
75
75
  [BGZF],
76
76
  :int
77
77
 
78
+ # Return a virtual file pointer to the current location in the file.
79
+ def self.bgzf_tell(fp)
80
+ (fp[:block_address] << 16) | (fp[:block_offset] & 0xFFFF)
81
+ end
82
+
78
83
  # Set the file to read from the location specified by _pos_.
79
84
  attach_function \
80
85
  :bgzf_seek,
@@ -176,7 +181,7 @@ module HTS
176
181
  # Load BGZF index from an hFILE
177
182
  attach_function \
178
183
  :bgzf_index_load_hfile,
179
- [BGZF, :HFILE, :string],
184
+ [BGZF, HFILE, :string],
180
185
  :int
181
186
 
182
187
  # Save BGZF index
@@ -188,7 +193,7 @@ module HTS
188
193
  # Write a BGZF index to an hFILE
189
194
  attach_function \
190
195
  :bgzf_index_dump_hfile,
191
- [BGZF, :HFILE, :string],
196
+ [BGZF, HFILE, :string],
192
197
  :int
193
198
  end
194
199
  end
@@ -2,7 +2,6 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- typedef :pointer, :HFILE
6
5
  typedef :int64, :hts_pos_t
7
6
  typedef :pointer, :bam_plp_auto_f
8
7
 
@@ -25,6 +24,25 @@ module HTS
25
24
  :f, :pointer # kstream_t
26
25
  end
27
26
 
27
+ # HFILE
28
+
29
+ class HFILE < FFI::BitStruct
30
+ layout \
31
+ :buffer, :string,
32
+ :begin, :string,
33
+ :end, :string,
34
+ :limit, :string,
35
+ :backend, :pointer,
36
+ :offset, :size_t,
37
+ :_flags, :uint,
38
+ :has_errno, :int
39
+
40
+ bit_fields :_flags,
41
+ :at_eof, 1,
42
+ :mobile, 1,
43
+ :readonly, 1
44
+ end
45
+
28
46
  # BGZF
29
47
  class BGZF < FFI::BitStruct
30
48
  layout \
@@ -38,7 +56,7 @@ module HTS
38
56
  :uncompressed_block, :pointer,
39
57
  :compressed_block, :pointer,
40
58
  :cache, :pointer,
41
- :fp, :HFILE,
59
+ :fp, HFILE.ptr,
42
60
  :mt, :pointer,
43
61
  :idx, :pointer,
44
62
  :idx_build_otf, :int,
@@ -198,8 +216,8 @@ module HTS
198
216
  :fp,
199
217
  union_layout(
200
218
  :bgzf, BGZF.ptr,
201
- :cram, :pointer,
202
- :hfile, :pointer # HFILE
219
+ :cram, :pointer, # cram_fd
220
+ :hfile, HFILE.ptr
203
221
  ),
204
222
  :state, :pointer,
205
223
  :format, HtsFormat,
@@ -218,7 +236,7 @@ module HTS
218
236
 
219
237
  SamFile = HtsFile
220
238
 
221
- class HtsThreadPool < FFI::Struct
239
+ class HtsTpool < FFI::Struct
222
240
  layout \
223
241
  :pool, :pointer,
224
242
  :qsize, :int