htslib 0.0.5 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,18 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Header
6
- def initialize(h)
7
- @h = h
6
+ def initialize(hts_file)
7
+ @bcf_hdr = LibHTS.bcf_hdr_read(hts_file)
8
8
  end
9
9
 
10
10
  def struct
11
- @h
11
+ @bcf_hdr
12
12
  end
13
13
 
14
14
  def to_ptr
15
- @h.to_ptr
15
+ @bcf_hdr.to_ptr
16
+ end
17
+
18
+ def get_version
19
+ LibHTS.bcf_hdr_get_version(@bcf_hdr)
20
+ end
21
+
22
+ def nsamples
23
+ LibHTS.bcf_hdr_nsamples(@bcf_hdr)
24
+ end
25
+
26
+ def samples
27
+ # bcf_hdr_id2name is macro function
28
+ @bcf_hdr[:samples]
29
+ .read_array_of_pointer(nsamples)
30
+ .map(&:read_string)
31
+ end
32
+
33
+ def to_s
34
+ kstr = LibHTS::KString.new
35
+ raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
36
+
37
+ kstr[:s]
38
+ end
39
+
40
+ private
41
+
42
+ def initialize_copy(orig)
43
+ @bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
16
44
  end
17
45
  end
18
46
  end
data/lib/hts/bcf/info.rb CHANGED
@@ -1,17 +1,38 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Info
6
6
  def initialize(record)
7
7
  @record = record
8
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
9
+ end
10
+
11
+ # For compatibility with htslib.cr.
12
+ def get_int(key)
13
+ get(key, :int)
14
+ end
15
+
16
+ # For compatibility with htslib.cr.
17
+ def get_float(key)
18
+ get(key, :float)
19
+ end
20
+
21
+ # For compatibility with htslib.cr.
22
+ def get_string(key)
23
+ get(key, :string)
24
+ end
25
+
26
+ # For compatibility with htslib.cr.
27
+ def get_flag(key)
28
+ get(key, :flag)
8
29
  end
9
30
 
10
31
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
11
32
  def get(key, type = nil)
12
33
  n = FFI::MemoryPointer.new(:int)
13
- p1 = @record.p1
14
- h = @record.bcf.header.struct
34
+ p1 = @p1
35
+ h = @record.header.struct
15
36
  r = @record.struct
16
37
 
17
38
  info_values = proc do |type|
@@ -44,6 +65,7 @@ module HTS
44
65
  end
45
66
  end
46
67
 
68
+ # FIXME: naming? room for improvement.
47
69
  def fields
48
70
  n_info = @record.struct[:n_info]
49
71
  Array.new(n_info) do |i|
@@ -53,10 +75,10 @@ module HTS
53
75
  )
54
76
  {
55
77
  name: LibHTS.bcf_hdr_int2id(
56
- @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
78
+ @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
57
79
  ),
58
80
  n: LibHTS.bcf_hdr_id2number(
59
- @record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
81
+ @record.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
60
82
  ),
61
83
  vtype: fld[:type], i: fld[:key]
62
84
  }
@@ -72,7 +94,7 @@ module HTS
72
94
  i * LibHTS::BcfInfo.size
73
95
  )
74
96
  id = LibHTS.bcf_hdr_int2id(
75
- @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
97
+ @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
76
98
  )
77
99
  return fld[:type] if id == key
78
100
  end
@@ -1,22 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Record
6
- def initialize(bcf_t, bcf)
6
+ def initialize(bcf_t, header)
7
7
  @bcf1 = bcf_t
8
- @bcf = bcf
9
- @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
8
+ @header = header
10
9
  end
11
10
 
12
- attr_reader :p1, :bcf
11
+ attr_reader :header
13
12
 
14
13
  def struct
15
14
  @bcf1
16
15
  end
17
16
 
18
17
  def to_ptr
19
- @bcf.to_ptr
18
+ @bcf1.to_ptr
20
19
  end
21
20
 
22
21
  # def inspect; end
@@ -26,10 +25,9 @@ module HTS
26
25
  def genotypes; end
27
26
 
28
27
  def chrom
29
- hdr = @bcf.header.struct
30
28
  rid = @bcf1[:rid]
31
29
 
32
- LibHTS.bcf_hdr_id2name(hdr, rid)
30
+ LibHTS.bcf_hdr_id2name(@header.struct, rid)
33
31
  end
34
32
 
35
33
  def pos
@@ -59,11 +57,13 @@ module HTS
59
57
  "PASS"
60
58
  when 1
61
59
  i = d[:flt].read_int
62
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
60
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
63
61
  when 2
64
62
  d[:flt].get_array_of_int(0, n_flt).map do |i|
65
- LibHTS.bcf_hdr_int2id(@bcf.header.struct, LibHTS::BCF_DT_ID, i)
63
+ LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
66
64
  end
65
+ else
66
+ raise "Unexpected number of filters. n_flt: #{n_flt}"
67
67
  end
68
68
  end
69
69
 
@@ -80,13 +80,14 @@ module HTS
80
80
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
81
81
  @bcf1[:d][:allele].get_array_of_pointer(
82
82
  FFI::TYPE_POINTER.size, @bcf1[:n_allele] - 1
83
- ).map { |c| c.read_string }
83
+ ).map(&:read_string)
84
84
  end
85
85
 
86
86
  def alleles
87
+ LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
87
88
  @bcf1[:d][:allele].get_array_of_pointer(
88
89
  0, @bcf1[:n_allele]
89
- ).map { |c| c.read_string }
90
+ ).map(&:read_string)
90
91
  end
91
92
 
92
93
  def info
@@ -101,10 +102,17 @@ module HTS
101
102
 
102
103
  def to_s
103
104
  ksr = LibHTS::KString.new
104
- raise "Failed to format record" if LibHTS.vcf_format(@bcf.header.struct, @bcf1, ksr) == -1
105
+ raise "Failed to format record" if LibHTS.vcf_format(@header.struct, @bcf1, ksr) == -1
105
106
 
106
107
  ksr[:s]
107
108
  end
109
+
110
+ private
111
+
112
+ def initialize_copy(orig)\
113
+ @header = orig.header
114
+ @bcf1 = LibHTS.bcf_dup(orig.struct)
115
+ end
108
116
  end
109
117
  end
110
118
  end
data/lib/hts/bcf.rb CHANGED
@@ -3,71 +3,109 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "../htslib"
7
+
8
+ require_relative "hts"
6
9
  require_relative "bcf/header"
7
10
  require_relative "bcf/info"
8
11
  require_relative "bcf/format"
9
12
  require_relative "bcf/record"
10
13
 
11
14
  module HTS
12
- class Bcf
15
+ class Bcf < Hts
13
16
  include Enumerable
14
17
 
15
- attr_reader :file_path, :mode, :header
16
- # HtfFile is FFI::BitStruct
17
- attr_reader :htf_file
18
+ attr_reader :file_name, :index_path, :mode, :header
18
19
 
19
- class << self
20
- alias open new
21
- end
20
+ def self.open(*args, **kw)
21
+ file = new(*args, **kw) # do not yield
22
+ return file unless block_given?
22
23
 
23
- def initialize(file_path, mode = "r")
24
- file_path = File.expand_path(file_path)
24
+ begin
25
+ yield file
26
+ ensure
27
+ file.close
28
+ end
29
+ file
30
+ end
25
31
 
26
- unless File.exist?(file_path)
27
- message = "No such VCF/BCF file - #{file_path}"
32
+ def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
33
+ create_index: false)
34
+ if block_given?
35
+ message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
28
36
  raise message
29
37
  end
30
38
 
31
- @file_path = file_path
39
+ # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
40
+
41
+ @file_name = file_name
32
42
  @mode = mode
33
- @htf_file = LibHTS.hts_open(file_path, mode)
34
- @header = Bcf::Header.new(LibHTS.bcf_hdr_read(htf_file))
43
+ @hts_file = LibHTS.hts_open(@file_name, mode)
35
44
 
36
- # IO like API
37
- if block_given?
38
- begin
39
- yield self
40
- ensure
41
- close
42
- end
45
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
46
+
47
+ if threads&.> 0
48
+ r = LibHTS.hts_set_threads(@hts_file, threads)
49
+ raise "Failed to set number of threads: #{threads}" if r < 0
43
50
  end
51
+
52
+ return if @mode[0] == "w"
53
+
54
+ @header = Bcf::Header.new(@hts_file)
44
55
  end
45
56
 
46
- def struct
47
- htf_file
57
+ def write_header
58
+ @header = header.dup
59
+ LibHTS.hts_set_fai_filename(header, @file_name)
60
+ LibHTS.bcf_hdr_write(@hts_file, header.struct)
48
61
  end
49
62
 
50
- def to_ptr
51
- htf_file.to_ptr
63
+ def write(var)
64
+ var_dup = var.dup = var.dup
65
+ LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
52
66
  end
53
67
 
54
68
  # Close the current file.
55
69
  def close
56
- LibHTS.hts_close(htf_file)
70
+ LibHTS.hts_close(@hts_file)
71
+ @hts_file = nil
57
72
  end
58
73
 
59
- def each
74
+ def closed?
75
+ @hts_file.nil?
76
+ end
77
+
78
+ def nsamples
79
+ header.nsamples
80
+ end
81
+
82
+ def samples
83
+ header.samples
84
+ end
85
+
86
+ # Iterate over each record.
87
+ # Generate a new Record object each time.
88
+ # Slower than each.
89
+ def each_copy
60
90
  return to_enum(__method__) unless block_given?
61
91
 
62
- while LibHTS.bcf_read(htf_file, header, bcf1 = LibHTS.bcf_init) != -1
63
- record = Record.new(bcf1, self)
92
+ while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
93
+ record = Record.new(bcf1, header)
64
94
  yield record
65
95
  end
66
96
  self
67
97
  end
68
98
 
69
- def n_samples
70
- LibHTS.bcf_hdr_nsamples(header.struct)
99
+ # Iterate over each record.
100
+ # Record object is reused.
101
+ # Faster than each_copy.
102
+ def each
103
+ return to_enum(__method__) unless block_given?
104
+
105
+ bcf1 = LibHTS.bcf_init
106
+ record = Record.new(bcf1, header)
107
+ yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
108
+ self
71
109
  end
72
110
  end
73
111
  end
data/lib/hts/faidx.rb CHANGED
@@ -3,17 +3,19 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
+ require_relative "../htslib"
7
+
6
8
  module HTS
7
9
  class Faidx
8
- attr_reader :file_path
10
+ attr_reader :file_name
9
11
 
10
12
  class << self
11
13
  alias open new
12
14
  end
13
15
 
14
- def initialize(file_path)
15
- @file_path = File.expand_path(file_path)
16
- @fai = LibHTS.fai_load(file_path)
16
+ def initialize(file_name)
17
+ @file_name = file_name
18
+ @fai = LibHTS.fai_load(@file_name)
17
19
 
18
20
  # IO like API
19
21
  if block_given?
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'ffi/bit_field'
3
+ require "ffi/bit_field"
4
4
 
5
5
  module FFI
6
6
  class Struct
data/lib/hts/hts.rb ADDED
@@ -0,0 +1,56 @@
1
+ require_relative "../htslib"
2
+
3
+ module HTS
4
+ class Hts
5
+ def struct
6
+ @hts_file
7
+ end
8
+
9
+ def to_ptr
10
+ @hts_file.to_ptr
11
+ end
12
+
13
+ def format
14
+ LibHTS.hts_get_format(@hts_file)[:format].to_s
15
+ end
16
+
17
+ def format_version
18
+ v = LibHTS.hts_get_format(@hts_file)[:version]
19
+ major = v[:major]
20
+ minor = v[:minor]
21
+ if minor == -1
22
+ major.to_s
23
+ else
24
+ "#{major}.#{minor}"
25
+ end
26
+ end
27
+
28
+ def seek(offset)
29
+ if @hts_file[:is_cram] == 1
30
+ LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
31
+ elsif @hts_file[:is_bgzf] == 1
32
+ LibHTS.bgzf_seek(@hts_file[:fp][:bgzf], offset, IO::SEEK_SET)
33
+ else
34
+ LibHTS.hseek(@hts_file[:fp][:hfile], offset, IO::SEEK_SET)
35
+ end
36
+ end
37
+
38
+ def tell
39
+ if @hts_file[:is_cram] == 1
40
+ # LibHTS.cram_tell(@hts_file[:fp][:cram])
41
+ # warn 'cram_tell is not implemented in c htslib'
42
+ nil
43
+ elsif @hts_file[:is_bgzf] == 1
44
+ LibHTS.bgzf_tell(@hts_file[:fp][:bgzf])
45
+ else
46
+ LibHTS.htell(@hts_file[:fp][:hfile])
47
+ end
48
+ end
49
+
50
+ def rewind
51
+ r = seek(@start_position) if @start_position
52
+ raise "Failed to rewind: #{r}" if r < 0
53
+ r
54
+ end
55
+ end
56
+ end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- %i[HFILE string],
21
+ [HFILE, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [:HFILE],
27
+ [HFILE],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- %i[HFILE pointer size_t],
33
+ [HFILE, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -75,6 +75,11 @@ module HTS
75
75
  [BGZF],
76
76
  :int
77
77
 
78
+ # Return a virtual file pointer to the current location in the file.
79
+ def self.bgzf_tell(fp)
80
+ (fp[:block_address] << 16) | (fp[:block_offset] & 0xFFFF)
81
+ end
82
+
78
83
  # Set the file to read from the location specified by _pos_.
79
84
  attach_function \
80
85
  :bgzf_seek,
@@ -176,7 +181,7 @@ module HTS
176
181
  # Load BGZF index from an hFILE
177
182
  attach_function \
178
183
  :bgzf_index_load_hfile,
179
- [BGZF, :HFILE, :string],
184
+ [BGZF, HFILE, :string],
180
185
  :int
181
186
 
182
187
  # Save BGZF index
@@ -188,7 +193,7 @@ module HTS
188
193
  # Write a BGZF index to an hFILE
189
194
  attach_function \
190
195
  :bgzf_index_dump_hfile,
191
- [BGZF, :HFILE, :string],
196
+ [BGZF, HFILE, :string],
192
197
  :int
193
198
  end
194
199
  end
@@ -2,7 +2,6 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- typedef :pointer, :HFILE
6
5
  typedef :int64, :hts_pos_t
7
6
  typedef :pointer, :bam_plp_auto_f
8
7
 
@@ -25,6 +24,25 @@ module HTS
25
24
  :f, :pointer # kstream_t
26
25
  end
27
26
 
27
+ # HFILE
28
+
29
+ class HFILE < FFI::BitStruct
30
+ layout \
31
+ :buffer, :string,
32
+ :begin, :string,
33
+ :end, :string,
34
+ :limit, :string,
35
+ :backend, :pointer,
36
+ :offset, :size_t,
37
+ :_flags, :uint,
38
+ :has_errno, :int
39
+
40
+ bit_fields :_flags,
41
+ :at_eof, 1,
42
+ :mobile, 1,
43
+ :readonly, 1
44
+ end
45
+
28
46
  # BGZF
29
47
  class BGZF < FFI::BitStruct
30
48
  layout \
@@ -38,7 +56,7 @@ module HTS
38
56
  :uncompressed_block, :pointer,
39
57
  :compressed_block, :pointer,
40
58
  :cache, :pointer,
41
- :fp, :HFILE,
59
+ :fp, HFILE.ptr,
42
60
  :mt, :pointer,
43
61
  :idx, :pointer,
44
62
  :idx_build_otf, :int,
@@ -198,8 +216,8 @@ module HTS
198
216
  :fp,
199
217
  union_layout(
200
218
  :bgzf, BGZF.ptr,
201
- :cram, :pointer,
202
- :hfile, :pointer # HFILE
219
+ :cram, :pointer, # cram_fd
220
+ :hfile, HFILE.ptr
203
221
  ),
204
222
  :state, :pointer,
205
223
  :format, HtsFormat,
@@ -218,7 +236,7 @@ module HTS
218
236
 
219
237
  SamFile = HtsFile
220
238
 
221
- class HtsThreadPool < FFI::Struct
239
+ class HtsTpool < FFI::Struct
222
240
  layout \
223
241
  :pool, :pointer,
224
242
  :qsize, :int