htslib 0.0.10 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/hts/bcf/info.rb CHANGED
@@ -28,6 +28,10 @@ module HTS
28
28
  get(key, :flag)
29
29
  end
30
30
 
31
+ def [](key)
32
+ get(key)
33
+ end
34
+
31
35
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
32
36
  def get(key, type = nil)
33
37
  n = FFI::MemoryPointer.new(:int)
@@ -35,14 +39,14 @@ module HTS
35
39
  h = @record.header.struct
36
40
  r = @record.struct
37
41
 
38
- info_values = proc do |type|
39
- ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
42
+ info_values = proc do |typ|
43
+ ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
40
44
  return nil if ret < 0 # return from method.
41
45
 
42
46
  p1.read_pointer
43
47
  end
44
48
 
45
- type ||= info_type_to_string(get_info_type(key))
49
+ type ||= ht_type_to_sym(get_info_type(key))
46
50
 
47
51
  case type&.to_sym
48
52
  when :int, :int32
@@ -67,47 +71,68 @@ module HTS
67
71
 
68
72
  # FIXME: naming? room for improvement.
69
73
  def fields
70
- n_info = @record.struct[:n_info]
71
- Array.new(n_info) do |i|
72
- fld = LibHTS::BcfInfo.new(
73
- @record.struct[:d][:info] +
74
- i * LibHTS::BcfInfo.size
75
- )
74
+ keys.map do |key|
75
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
76
+ num = LibHTS.bcf_hdr_id2number(@record.header.struct, LibHTS::BCF_HL_INFO, key)
77
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, key)
76
78
  {
77
- name: LibHTS.bcf_hdr_int2id(
78
- @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
79
- ),
80
- n: LibHTS.bcf_hdr_id2number(
81
- @record.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
82
- ),
83
- vtype: fld[:type], i: fld[:key]
79
+ name: name,
80
+ n: num,
81
+ type: ht_type_to_sym(type),
82
+ key: key
84
83
  }
85
84
  end
86
85
  end
87
86
 
87
+ def length
88
+ @record.struct[:n_info]
89
+ end
90
+
91
+ def size
92
+ length
93
+ end
94
+
95
+ def to_h
96
+ ret = {}
97
+ keys.each do |key|
98
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
99
+ ret[name] = get(name)
100
+ end
101
+ ret
102
+ end
103
+
88
104
  private
89
105
 
106
+ def info_ptr
107
+ @record.struct[:d][:info].to_ptr
108
+ end
109
+
110
+ def keys
111
+ info_ptr.read_array_of_struct(LibHTS::BcfInfo, length).map do |info|
112
+ info[:key]
113
+ end
114
+ end
115
+
90
116
  def get_info_type(key)
91
117
  @record.struct[:n_info].times do |i|
92
- fld = LibHTS::BcfInfo.new(
93
- @record.struct[:d][:info] +
94
- i * LibHTS::BcfInfo.size
95
- )
96
- id = LibHTS.bcf_hdr_int2id(
97
- @record.header.struct, LibHTS::BCF_DT_ID, fld[:key]
98
- )
99
- return fld[:type] if id == key
118
+ info = LibHTS::BcfInfo.new(@record.struct[:d][:info] + i * LibHTS::BcfInfo.size)
119
+ k = info[:key]
120
+ id = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, k)
121
+ if id == key
122
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, k)
123
+ return type
124
+ end
100
125
  end
126
+ nil
101
127
  end
102
128
 
103
- def info_type_to_string(t)
129
+ def ht_type_to_sym(t)
104
130
  case t
105
- when 0 then :flag
106
- when 1, 2, 3, 4 then :int
107
- when 5 then :float
108
- when 7 then :string
109
- else
110
- raise "Unknown info type: #{t}"
131
+ when LibHTS::BCF_HT_FLAG then :flag
132
+ when LibHTS::BCF_HT_INT then :int
133
+ when LibHTS::BCF_HT_REAL then :float
134
+ when LibHTS::BCF_HT_STR then :string
135
+ when LibHTS::BCF_HT_LONG then :float
111
136
  end
112
137
  end
113
138
  end
@@ -18,35 +18,48 @@ module HTS
18
18
  @bcf1.to_ptr
19
19
  end
20
20
 
21
- # def inspect; end
22
-
23
- def formats; end
21
+ # Get the reference id of the record.
22
+ def rid
23
+ @bcf1[:rid]
24
+ end
24
25
 
25
- def genotypes; end
26
+ def rid=(rid)
27
+ @bcf1[:rid] = rid
28
+ end
26
29
 
30
+ # Get the chromosome of variant.
27
31
  def chrom
28
- rid = @bcf1[:rid]
29
-
30
32
  LibHTS.bcf_hdr_id2name(@header.struct, rid)
31
33
  end
32
34
 
35
+ # Return 0-based position.
33
36
  def pos
34
- @bcf1[:pos] + 1 # FIXME
37
+ @bcf1[:pos]
35
38
  end
36
39
 
37
- def start
38
- @bcf1[:pos]
40
+ def pos=(pos)
41
+ @bcf1[:pos] = pos
39
42
  end
40
43
 
41
- def stop
42
- @bcf1[:pos] + @bcf1[:rlen]
44
+ # Return the 0-based, exclusive end position
45
+ def endpos
46
+ pos + @bcf1[:rlen]
43
47
  end
44
48
 
49
+ # Return the value of the ID column.
45
50
  def id
46
51
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_INFO)
47
52
  @bcf1[:d][:id]
48
53
  end
49
54
 
55
+ def id=(id)
56
+ LibHTS.bcf_update_id(@header, @bcf1, id)
57
+ end
58
+
59
+ def clear_id
60
+ LibHTS.bcf_update_id(@header, @bcf1, ".")
61
+ end
62
+
50
63
  def filter
51
64
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FLT)
52
65
  d = @bcf1[:d]
@@ -58,7 +71,7 @@ module HTS
58
71
  when 1
59
72
  i = d[:flt].read_int
60
73
  LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
61
- when 2
74
+ when 2..nil
62
75
  d[:flt].get_array_of_int(0, n_flt).map do |i|
63
76
  LibHTS.bcf_hdr_int2id(@header.struct, LibHTS::BCF_DT_ID, i)
64
77
  end
@@ -67,10 +80,15 @@ module HTS
67
80
  end
68
81
  end
69
82
 
83
+ # Get variant quality.
70
84
  def qual
71
85
  @bcf1[:qual]
72
86
  end
73
87
 
88
+ def qual=(qual)
89
+ @bcf1[:qual] = qual
90
+ end
91
+
74
92
  def ref
75
93
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_STR)
76
94
  @bcf1[:d][:allele].get_pointer(0).read_string
@@ -90,14 +108,23 @@ module HTS
90
108
  ).map(&:read_string)
91
109
  end
92
110
 
93
- def info
111
+ def info(key = nil)
94
112
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_SHR)
95
- Info.new(self)
113
+ info = Info.new(self)
114
+ if key
115
+ info.get(key)
116
+ else
117
+ info
118
+ end
96
119
  end
97
120
 
98
- def format
121
+ def format(key = nil)
99
122
  LibHTS.bcf_unpack(@bcf1, LibHTS::BCF_UN_FMT)
100
- Format.new(self)
123
+ if key
124
+ Format.new(self).get(key)
125
+ else
126
+ Format.new(self)
127
+ end
101
128
  end
102
129
 
103
130
  def to_s
data/lib/hts/bcf.rb CHANGED
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
5
-
6
3
  require_relative "../htslib"
7
4
 
8
5
  require_relative "hts"
@@ -15,7 +12,7 @@ module HTS
15
12
  class Bcf < Hts
16
13
  include Enumerable
17
14
 
18
- attr_reader :file_name, :index_path, :mode, :header
15
+ attr_reader :file_name, :index_name, :mode, :header
19
16
 
20
17
  def self.open(*args, **kw)
21
18
  file = new(*args, **kw) # do not yield
@@ -38,9 +35,10 @@ module HTS
38
35
 
39
36
  # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
40
37
 
41
- @file_name = file_name
42
- @mode = mode
43
- @hts_file = LibHTS.hts_open(@file_name, mode)
38
+ @file_name = file_name
39
+ @index_name = index
40
+ @mode = mode
41
+ @hts_file = LibHTS.hts_open(@file_name, mode)
44
42
 
45
43
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
46
44
 
@@ -52,28 +50,51 @@ module HTS
52
50
  return if @mode[0] == "w"
53
51
 
54
52
  @header = Bcf::Header.new(@hts_file)
53
+
54
+ create_index(index) if create_index
55
+
56
+ @idx = load_index(index)
57
+
58
+ @start_position = tell
59
+ end
60
+
61
+ def create_index(index_name = nil)
62
+ warn "Create index for #{@file_name} to #{index_name}"
63
+ if index
64
+ LibHTS.bcf_index_build2(@hts_file, index_name, -1)
65
+ else
66
+ LibHTS.bcf_index_build(@hts_file, -1)
67
+ end
68
+ end
69
+
70
+ def load_index(index_name = nil)
71
+ if index_name
72
+ LibHTS.bcf_index_load2(@file_name, index_name)
73
+ else
74
+ LibHTS.bcf_index_load3(@file_name, nil, 2)
75
+ end
76
+ end
77
+
78
+ def index_loaded?
79
+ !@idx.null?
55
80
  end
56
81
 
57
82
  def write_header
83
+ raise IOError, "closed stream" if closed?
84
+
58
85
  @header = header.dup
59
86
  LibHTS.hts_set_fai_filename(header, @file_name)
60
87
  LibHTS.bcf_hdr_write(@hts_file, header.struct)
61
88
  end
62
89
 
63
90
  def write(var)
91
+ raise IOError, "closed stream" if closed?
92
+
64
93
  var_dup = var.dup = var.dup
65
94
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
66
95
  end
67
96
 
68
97
  # Close the current file.
69
- def close
70
- LibHTS.hts_close(@hts_file)
71
- @hts_file = nil
72
- end
73
-
74
- def closed?
75
- @hts_file.nil?
76
- end
77
98
 
78
99
  def nsamples
79
100
  header.nsamples
@@ -87,6 +108,8 @@ module HTS
87
108
  # Generate a new Record object each time.
88
109
  # Slower than each.
89
110
  def each_copy
111
+ raise IOError, "closed stream" if closed?
112
+
90
113
  return to_enum(__method__) unless block_given?
91
114
 
92
115
  while LibHTS.bcf_read(@hts_file, header, bcf1 = LibHTS.bcf_init) != -1
@@ -100,6 +123,8 @@ module HTS
100
123
  # Record object is reused.
101
124
  # Faster than each_copy.
102
125
  def each
126
+ raise IOError, "closed stream" if closed?
127
+
103
128
  return to_enum(__method__) unless block_given?
104
129
 
105
130
  bcf1 = LibHTS.bcf_init
data/lib/hts/faidx.rb CHANGED
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
5
-
6
3
  require_relative "../htslib"
7
4
 
8
5
  module HTS
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FFI
4
+ class Pointer
5
+ unless method_defined?(:read_array_of_struct)
6
+ def read_array_of_struct(type, length)
7
+ ary = []
8
+ size = type.size
9
+ tmp = self
10
+ length.times do |j|
11
+ ary << type.new(tmp)
12
+ tmp += size unless j == length - 1 # avoid OOB
13
+ end
14
+ ary
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/hts/hts.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "../htslib"
2
4
 
3
5
  module HTS
@@ -24,7 +26,18 @@ module HTS
24
26
  "#{major}.#{minor}"
25
27
  end
26
28
  end
27
-
29
+
30
+ def close
31
+ return if closed?
32
+
33
+ LibHTS.hts_close(@hts_file)
34
+ @hts_file = nil
35
+ end
36
+
37
+ def closed?
38
+ @hts_file.nil? || @hts_file.null?
39
+ end
40
+
28
41
  def seek(offset)
29
42
  if @hts_file[:is_cram] == 1
30
43
  LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
@@ -48,9 +61,14 @@ module HTS
48
61
  end
49
62
 
50
63
  def rewind
51
- r = seek(@start_position) if @start_position
52
- raise "Failed to rewind: #{r}" if r < 0
53
- r
64
+ if @start_position
65
+ r = seek(@start_position)
66
+ raise "Failed to rewind: #{r}" if r < 0
67
+
68
+ tell
69
+ else
70
+ raise "Cannot rewind: no start position"
71
+ end
54
72
  end
55
73
  end
56
74
  end
@@ -36,7 +36,7 @@ module HTS
36
36
  :offset, :size_t,
37
37
  :_flags, :uint,
38
38
  :has_errno, :int
39
-
39
+
40
40
  bit_fields :_flags,
41
41
  :at_eof, 1,
42
42
  :mobile, 1,
@@ -479,8 +479,8 @@ module HTS
479
479
  :id, :string,
480
480
  :als, :pointer, # (\\0-separated string)
481
481
  :allele, :pointer,
482
- :info, :pointer, # BcfInfo.ptr,
483
- :fmt, BcfFmt.ptr,
482
+ :info, :pointer, # array of BcfInfo.ptr,
483
+ :fmt, :pointer, # array of BcfFmt.ptr,
484
484
  :var, BcfVariant.ptr,
485
485
  :n_var, :int,
486
486
  :var_type, :int,