htslib 0.0.10 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bcf/info.rb CHANGED
@@ -28,6 +28,10 @@ module HTS
28
28
  get(key, :flag)
29
29
  end
30
30
 
31
+ def [](key)
32
+ get(key)
33
+ end
34
+
31
35
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
32
36
  def get(key, type = nil)
33
37
  n = FFI::MemoryPointer.new(:int)
@@ -35,14 +39,14 @@ module HTS
35
39
  h = @record.header.struct
36
40
  r = @record.struct
37
41
 
38
- info_values = proc do |type|
39
- ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
42
+ info_values = proc do |typ|
43
+ ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
40
44
  return nil if ret < 0 # return from method.
41
45
 
42
46
  p1.read_pointer
43
47
  end
44
48
 
45
- type ||= info_type_to_string(get_info_type(key))
49
+ type ||= ht_type_to_sym(get_info_type(key))
46
50
 
47
51
  case type&.to_sym
48
52
  when :int, :int32
@@ -67,47 +71,68 @@ module HTS
67
71
 
68
72
  # FIXME: naming? room for improvement.
69
73
  def fields
70
- n_info = @record.struct[:n_info]
71
- Array.new(n_info) do |i|
72
- fld = LibHTS::BcfInfo.new(
73
- @record.struct[:d][:info] +
74
- i * LibHTS::BcfInfo.size
75
- )
74
+ keys.map do |key|
75
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
76
+ num = LibHTS.bcf_hdr_id2number(@record.header.struct, LibHTS::BCF_HL_INFO, key)
77
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, key)
76
78
  {
77
- name: LibHTS.bcf_hdr_int2id(
78
- @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
79
- ),
80
- n: LibHTS.bcf_hdr_id2number(
81
- @record.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
82
- ),
83
- vtype: fld[:type], i: fld[:key]
79
+ name: name,
80
+ n: num,
81
+ type: ht_type_to_sym(type),
82
+ key: key
84
83
  }
85
84
  end
86
85
  end
87
86
 
87
+ def length
88
+ @record.struct[:n_info]
89
+ end
90
+
91
+ def size
92
+ length
93
+ end
94
+
95
+ def to_h
96
+ ret = {}
97
+ keys.each do |key|
98
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
99
+ ret[name] = get(name)
100
+ end
101
+ ret
102
+ end
103
+
88
104
  private
89
105
 
106
+ def info_ptr
107
+ @record.struct[:d][:info].to_ptr
108
+ end
109
+
110
+ def keys
111
+ info_ptr.read_array_of_struct(LibHTS::BcfInfo, length).map do |info|
112
+ info[:key]
113
+ end
114
+ end
115
+
90
116
  def get_info_type(key)
91
117
  @record.struct[:n_info].times do |i|
92
- fld = LibHTS::BcfInfo.new(
93
- @record.struct[:d][:info] +
94
- i * LibHTS::BcfInfo.size
95
- )
96
- id = LibHTS.bcf_hdr_int2id(
97
- @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
98
- )
99
- return fld[:type] if id == key
118
+ info = LibHTS::BcfInfo.new(@record.struct[:d][:info] + i * LibHTS::BcfInfo.size)
119
+ k = info[:key]
120
+ id = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, k)
121
+ if id == key
122
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, k)
123
+ return type
124
+ end
100
125
  end
126
+ nil
101
127
  end
102
128
 
103
- def info_type_to_string(t)
129
+ def ht_type_to_sym(t)
104
130
  case t
105
- when 0 then :flag
106
- when 1, 2, 3, 4 then :int
107
- when 5 then :float
108
- when 7 then :string
109
- else
110
- raise "Unknown info type: #{t}"
131
+ when LibHTS::BCF_HT_FLAG then :flag
132
+ when LibHTS::BCF_HT_INT then :int
133
+ when LibHTS::BCF_HT_REAL then :float
134
+ when LibHTS::BCF_HT_STR then :string
135
+ when LibHTS::BCF_HT_LONG then :float
111
136
  end
112
137
  end
113
138
  end
@@ -18,35 +18,48 @@ module HTS
18
18
  @bcf1.to_ptr
19
19
  end
20
20
 
21
- # def inspect; end
22
-
23
- def formats; end
21
+ # Get the reference id of the record.
22
+ def rid
23
+ @bcf1[:rid]
24
+ end
24
25
 
25
- def genotypes; end
26
+ def rid=(rid)
27
+ @bcf1[:rid] = rid
28
+ end
26
29
 
30
+ # Get the chromosome of variant.
27
31
  def chrom
28
- rid = @bcf1[:rid]
29
-
30
32
  LibHTS.bcf_hdr_id2name(@header.struct, rid)
31
33
  end
32
34
 
35
+ # Return 0-based position.
33
36
  def pos
34
- @bcf1[:pos] + 1 # FIXME
37
+ @bcf1[:pos]
35
38
  end
36
39
 
37
- def start
38
- @bcf1[:pos]
40
+ def pos=(pos)
41
+ @bcf1[:pos] = pos
39
42
  end
40
43
 
41
- def stop
42
- @bcf1[:pos] + @bcf1[:rlen]
44
+ # Return the 0-based, exclusive end position
45
+ def endpos
46
+ pos + @bcf1[:rlen]
43
47
  end
44
48
 
49
+ # Return the value of the ID column.
45
50
  def id
46
51
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
47
52
  @bcf1[:d][:id]
48
53
  end
49
54
 
55
+ def id=(id)
56
+ LibHTS.bcf_update_id(@header, @bcf1, id)
57
+ end
58
+
59
+ def clear_id
60
+ LibHTS.bcf_update_id(@header, @bcf1, ".")
61
+ end
62
+
50
63
  def filter
51
64
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
52
65
  d = @bcf1[:d]
@@ -58,7 +71,7 @@ module HTS
58
71
  when 1
59
72
  i = d[:flt].read_int
60
73
  LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
61
- when 2
74
+ when 2..nil
62
75
  d[:flt].get_array_of_int(0, n_flt).map do |i|
63
76
  LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
64
77
  end
@@ -67,10 +80,15 @@ module HTS
67
80
  end
68
81
  end
69
82
 
83
+ # Get variant quality.
70
84
  def qual
71
85
  @bcf1[:qual]
72
86
  end
73
87
 
88
+ def qual=(qual)
89
+ @bcf1[:qual] = qual
90
+ end
91
+
74
92
  def ref
75
93
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
76
94
  @bcf1[:d][:allele].get_pointer(0).read_string
@@ -90,14 +108,23 @@ module HTS
90
108
  ).map(&:read_string)
91
109
  end
92
110
 
93
- def info
111
+ def info(key = nil)
94
112
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
95
- Info.new(self)
113
+ info = Info.new(self)
114
+ if key
115
+ info.get(key)
116
+ else
117
+ info
118
+ end
96
119
  end
97
120
 
98
- def format
121
+ def format(key = nil)
99
122
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
100
- Format.new(self)
123
+ if key
124
+ Format.new(self).get(key)
125
+ else
126
+ Format.new(self)
127
+ end
101
128
  end
102
129
 
103
130
  def to_s
data/lib/hts/bcf.rb CHANGED
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
5
-
6
3
  require_relative "../htslib"
7
4
 
8
5
  require_relative "hts"
@@ -15,7 +12,7 @@ module HTS
15
12
  class Bcf < Hts
16
13
  include Enumerable
17
14
 
18
- attr_reader :file_name, :index_path, :mode, :header
15
+ attr_reader :file_name, :index_name, :mode, :header
19
16
 
20
17
  def self.open(*args, **kw)
21
18
  file = new(*args, **kw) # do not yield
@@ -38,9 +35,10 @@ module HTS
38
35
 
39
36
  # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
40
37
 
41
- @file_name = file_name
42
- @mode = mode
43
- @hts_file = LibHTS.hts_open(@file_name, mode)
38
+ @file_name = file_name
39
+ @index_name = index
40
+ @mode = mode
41
+ @hts_file = LibHTS.hts_open(@file_name, mode)
44
42
 
45
43
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
46
44
 
@@ -52,28 +50,51 @@ module HTS
52
50
  return if @mode[0] == "w"
53
51
 
54
52
  @header = Bcf::Header.new(@hts_file)
53
+
54
+ create_index(index) if create_index
55
+
56
+ @idx = load_index(index)
57
+
58
+ @start_position = tell
59
+ end
60
+
61
+ def create_index(index_name = nil)
62
+ warn "Create index for #{@file_name} to #{index_name}"
63
+ if index
64
+ LibHTS.bcf_index_build2(@hts_file, index_name, -1)
65
+ else
66
+ LibHTS.bcf_index_build(@hts_file, -1)
67
+ end
68
+ end
69
+
70
+ def load_index(index_name = nil)
71
+ if index_name
72
+ LibHTS.bcf_index_load2(@file_name, index_name)
73
+ else
74
+ LibHTS.bcf_index_load3(@file_name, nil, 2)
75
+ end
76
+ end
77
+
78
+ def index_loaded?
79
+ !@idx.null?
55
80
  end
56
81
 
57
82
  def write_header
83
+ raise IOError, "closed stream" if closed?
84
+
58
85
  @header = header.dup
59
86
  LibHTS.hts_set_fai_filename(header, @file_name)
60
87
  LibHTS.bcf_hdr_write(@hts_file, header.struct)
61
88
  end
62
89
 
63
90
  def write(var)
91
+ raise IOError, "closed stream" if closed?
92
+
64
93
  var_dup = var.dup = var.dup
65
94
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
66
95
  end
67
96
 
68
97
  # Close the current file.
69
- def close
70
- LibHTS.hts_close(@hts_file)
71
- @hts_file = nil
72
- end
73
-
74
- def closed?
75
- @hts_file.nil?
76
- end
77
98
 
78
99
  def nsamples
79
100
  header.nsamples
@@ -87,6 +108,8 @@ module HTS
87
108
  # Generate a new Record object each time.
88
109
  # Slower than each.
89
110
  def each_copy
111
+ raise IOError, "closed stream" if closed?
112
+
90
113
  return to_enum(__method__) unless block_given?
91
114
 
92
115
  while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
@@ -100,6 +123,8 @@ module HTS
100
123
  # Record object is reused.
101
124
  # Faster than each_copy.
102
125
  def each
126
+ raise IOError, "closed stream" if closed?
127
+
103
128
  return to_enum(__method__) unless block_given?
104
129
 
105
130
  bcf1 = LibHTS.bcf_init
data/lib/hts/faidx.rb CHANGED
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
5
-
6
3
  require_relative "../htslib"
7
4
 
8
5
  module HTS
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FFI
4
+ class Pointer
5
+ unless method_defined?(:read_array_of_struct)
6
+ def read_array_of_struct(type, length)
7
+ ary = []
8
+ size = type.size
9
+ tmp = self
10
+ length.times do |j|
11
+ ary << type.new(tmp)
12
+ tmp += size unless j == length - 1 # avoid OOB
13
+ end
14
+ ary
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/hts/hts.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "../htslib"
2
4
 
3
5
  module HTS
@@ -24,7 +26,18 @@ module HTS
24
26
  "#{major}.#{minor}"
25
27
  end
26
28
  end
27
-
29
+
30
+ def close
31
+ return if closed?
32
+
33
+ LibHTS.hts_close(@hts_file)
34
+ @hts_file = nil
35
+ end
36
+
37
+ def closed?
38
+ @hts_file.nil? || @hts_file.null?
39
+ end
40
+
28
41
  def seek(offset)
29
42
  if @hts_file[:is_cram] == 1
30
43
  LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
@@ -48,9 +61,14 @@ module HTS
48
61
  end
49
62
 
50
63
  def rewind
51
- r = seek(@start_position) if @start_position
52
- raise "Failed to rewind: #{r}" if r < 0
53
- r
64
+ if @start_position
65
+ r = seek(@start_position)
66
+ raise "Failed to rewind: #{r}" if r < 0
67
+
68
+ tell
69
+ else
70
+ raise "Cannot rewind: no start position"
71
+ end
54
72
  end
55
73
  end
56
74
  end
@@ -36,7 +36,7 @@ module HTS
36
36
  :offset, :size_t,
37
37
  :_flags, :uint,
38
38
  :has_errno, :int
39
-
39
+
40
40
  bit_fields :_flags,
41
41
  :at_eof, 1,
42
42
  :mobile, 1,
@@ -479,8 +479,8 @@ module HTS
479
479
  :id, :string,
480
480
  :als, :pointer, # (\\0-separated string)
481
481
  :allele, :pointer,
482
- :info, :pointer, # BcfInfo.ptr,
483
- :fmt, BcfFmt.ptr,
482
+ :info, :pointer, # array of BcfInfo.ptr,
483
+ :fmt, :pointer, # array of BcfFmt.ptr,
484
484
  :var, BcfVariant.ptr,
485
485
  :n_var, :int,
486
486
  :var_type, :int,