htslib 0.0.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bam.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Based on hts-python
4
- # https://github.com/quinlan-lab/hts-python
3
+ require_relative "../htslib"
5
4
 
5
+ require_relative "hts"
6
6
  require_relative "bam/header"
7
7
  require_relative "bam/cigar"
8
8
  require_relative "bam/flag"
@@ -12,99 +12,143 @@ module HTS
12
12
  class Bam
13
13
  include Enumerable
14
14
 
15
- attr_reader :file_path, :mode, :header
16
- # HtfFile is FFI::BitStruct
17
- attr_reader :htf_file
15
+ attr_reader :file_name, :index_name, :mode, :header
18
16
 
19
- class << self
20
- alias open new
21
- end
17
+ def self.open(*args, **kw)
18
+ file = new(*args, **kw) # do not yield
19
+ return file unless block_given?
22
20
 
23
- def initialize(file_path, mode = "r", create_index: nil)
24
- file_path = File.expand_path(file_path)
21
+ begin
22
+ yield file
23
+ ensure
24
+ file.close
25
+ end
26
+ file
27
+ end
25
28
 
26
- unless File.exist?(file_path)
27
- message = "No such SAM/BAM file - #{file_path}"
29
+ def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
30
+ create_index: false)
31
+ if block_given?
32
+ message = "HTS::Bam.new() dose not take block; Please use HTS::Bam.open() instead"
28
33
  raise message
29
34
  end
30
35
 
31
- @file_path = file_path
32
- @mode = mode
33
- @htf_file = LibHTS.hts_open(file_path, mode)
34
- @header = Bam::Header.new(LibHTS.sam_hdr_read(htf_file))
35
-
36
- # read
37
- if mode[0] == "r"
38
- # load index
39
- @idx = LibHTS.sam_index_load(htf_file, file_path)
40
- # create index
41
- if create_index || (@idx.null? && create_index.nil?)
42
- warn "Create index for #{file_path}"
43
- LibHTS.sam_index_build(file_path, -1)
44
- @idx = LibHTS.sam_index_load(htf_file, file_path)
45
- end
46
- else
47
- # FIXME: implement
48
- raise "not implemented yet."
36
+ # NOTE: Do not check for the existence of local files, since file_names may be remote URIs.
37
+
38
+ @file_name = file_name
39
+ @index_name = index
40
+ @mode = mode
41
+ @hts_file = LibHTS.hts_open(@file_name, mode)
42
+
43
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
44
+
45
+ if fai
46
+ r = LibHTS.hts_set_fai_filename(@hts_file, fai)
47
+ raise "Failed to load fasta index: #{fai}" if r < 0
49
48
  end
50
49
 
51
- # IO like API
52
- if block_given?
53
- begin
54
- yield self
55
- ensure
56
- close
57
- end
50
+ if threads&.> 0
51
+ r = LibHTS.hts_set_threads(@hts_file, threads)
52
+ raise "Failed to set number of threads: #{threads}" if r < 0
58
53
  end
59
- end
60
54
 
61
- def struct
62
- htf_file
55
+ return if @mode[0] == "w"
56
+
57
+ @header = Bam::Header.new(@hts_file)
58
+
59
+ create_index(index) if create_index
60
+
61
+ @idx = load_index(index)
62
+
63
+ @start_position = tell
63
64
  end
64
65
 
65
- def to_ptr
66
- htf_file.to_ptr
66
+ def create_index(index_name = nil)
67
+ warn "Create index for #{@file_name} to #{index_name}"
68
+ if index
69
+ LibHTS.sam_index_build2(@file_name, index_name, -1)
70
+ else
71
+ LibHTS.sam_index_build(@file_name, -1)
72
+ end
67
73
  end
68
74
 
69
- def write(alns)
70
- alns.each do
71
- LibHTS.sam_write1(htf_file, header, alns.b) > 0 || raise
75
+ def load_index(index_name = nil)
76
+ if index_name
77
+ LibHTS.sam_index_load2(@hts_file, @file_name, index_name)
78
+ else
79
+ LibHTS.sam_index_load3(@hts_file, @file_name, nil, 2) # should be 3 ? (copy remote file to local?)
72
80
  end
73
81
  end
74
82
 
83
+ def index_loaded?
84
+ !@idx.null?
85
+ end
86
+
75
87
  # Close the current file.
76
88
  def close
77
- LibHTS.hts_close(htf_file)
89
+ LibHTS.hts_idx_destroy(@idx) if @idx&.null?
90
+ @idx = nil
91
+ super
92
+ end
93
+
94
+ def write_header(header)
95
+ raise IOError, "closed stream" if closed?
96
+
97
+ @header = header.dup
98
+ LibHTS.hts_set_fai_filename(@hts_file, @file_name)
99
+ LibHTS.sam_hdr_write(@hts_file, header)
78
100
  end
79
101
 
80
- # Flush the current file.
81
- def flush
82
- # LibHTS.bgzf_flush(@htf_file.fp.bgzf)
102
+ def write(aln)
103
+ raise IOError, "closed stream" if closed?
104
+
105
+ aln_dup = aln.dup
106
+ LibHTS.sam_write1(@hts_file, header, aln_dup) > 0 || raise
83
107
  end
84
108
 
109
+ # Iterate over each record.
110
+ # Generate a new Record object each time.
111
+ # Slower than each.
112
+ def each_copy
113
+ raise IOError, "closed stream" if closed?
114
+ return to_enum(__method__) unless block_given?
115
+
116
+ while LibHTS.sam_read1(@hts_file, header, bam1 = LibHTS.bam_init1) != -1
117
+ record = Record.new(bam1, header)
118
+ yield record
119
+ end
120
+ self
121
+ end
122
+
123
+ # Iterate over each record.
124
+ # Record object is reused.
125
+ # Faster than each_copy.
85
126
  def each
127
+ raise IOError, "closed stream" if closed?
86
128
  # Each does not always start at the beginning of the file.
87
129
  # This is the common behavior of IO objects in Ruby.
88
130
  # This may change in the future.
89
131
  return to_enum(__method__) unless block_given?
90
132
 
91
- while LibHTS.sam_read1(htf_file, header, bam1 = LibHTS.bam_init1) > 0
92
- record = Record.new(bam1, header)
93
- yield record
94
- end
133
+ bam1 = LibHTS.bam_init1
134
+ record = Record.new(bam1, header)
135
+ yield record while LibHTS.sam_read1(@hts_file, header, bam1) != -1
95
136
  self
96
137
  end
97
138
 
98
139
  # query [WIP]
99
140
  def query(region)
141
+ raise IOError, "closed stream" if closed?
142
+ raise "Index file is required to call the query method." unless index_loaded?
143
+
100
144
  qiter = LibHTS.sam_itr_querys(@idx, header, region)
101
145
  begin
102
146
  bam1 = LibHTS.bam_init1
103
- slen = LibHTS.sam_itr_next(htf_file, qiter, bam1)
147
+ slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)
104
148
  while slen > 0
105
149
  yield Record.new(bam1, header)
106
150
  bam1 = LibHTS.bam_init1
107
- slen = LibHTS.sam_itr_next(htf_file, qiter, bam1)
151
+ slen = LibHTS.sam_itr_next(@hts_file, qiter, bam1)
108
152
  end
109
153
  ensure
110
154
  LibHTS.hts_itr_destroy(qiter)
@@ -1,21 +1,41 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # https://github.com/brentp/hts-nim/blob/master/src/hts/vcf.nim
4
- # This is a port from Nim.
5
- # TODO: Make it more like Ruby.
6
-
7
3
  module HTS
8
- class Bcf
4
+ class Bcf < Hts
9
5
  class Format
10
6
  def initialize(record)
11
7
  @record = record
12
8
  @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
13
9
  end
14
10
 
11
+ # For compatibility with htslib.cr.
12
+ def get_int(key)
13
+ get(key, :int)
14
+ end
15
+
16
+ # For compatibility with htslib.cr.
17
+ def get_float(key)
18
+ get(key, :float)
19
+ end
20
+
21
+ # For compatibility with htslib.cr.
22
+ def get_flag(key)
23
+ get(key, :flag)
24
+ end
25
+
26
+ # For compatibility with htslib.cr.
27
+ def get_string(key)
28
+ get(key, :string)
29
+ end
30
+
31
+ def [](key)
32
+ get(key)
33
+ end
34
+
15
35
  def get(key, type = nil)
16
36
  n = FFI::MemoryPointer.new(:int)
17
37
  p1 = @p1
18
- h = @record.bcf.header.struct
38
+ h = @record.header.struct
19
39
  r = @record.struct
20
40
 
21
41
  format_values = proc do |type|
@@ -25,7 +45,15 @@ module HTS
25
45
  p1.read_pointer
26
46
  end
27
47
 
28
- case type.to_sym
48
+ # The GT FORMAT field is special in that it is marked as a string in the header,
49
+ # but it is actually encoded as an integer.
50
+ if key == "GT"
51
+ type = :int
52
+ elsif type.nil?
53
+ type = ht_type_to_sym(get_fmt_type(key))
54
+ end
55
+
56
+ case type&.to_sym
29
57
  when :int, :int32
30
58
  format_values.call(LibHTS::BCF_HT_INT)
31
59
  .read_array_of_int32(n.read_int)
@@ -33,21 +61,85 @@ module HTS
33
61
  format_values.call(LibHTS::BCF_HT_REAL)
34
62
  .read_array_of_float(n.read_int)
35
63
  when :flag
36
- format_values.call(LibHTS::BCF_HT_FLAG)
37
- .read_int == 1
64
+ raise NotImplementedError, "Flag type not implemented yet. " \
65
+ "Please file an issue on GitHub."
66
+ # format_values.call(LibHTS::BCF_HT_FLAG)
67
+ # .read_int == 1
38
68
  when :string, :str
39
- raise NotImplementedError, "String type not implemented yet."
40
- format_values.call(LibHTS::BCF_HT_STR)
41
- .read_string
69
+ raise NotImplementedError, "String type not implemented yet. " \
70
+ "Please file an issue on GitHub."
71
+ # format_values.call(LibHTS::BCF_HT_STR)
72
+ # .read_string
42
73
  end
43
74
  end
44
75
 
45
- def set; end
76
+ def fields
77
+ ids.map do |id|
78
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, id)
79
+ num = LibHTS.bcf_hdr_id2number(@record.header.struct, LibHTS::BCF_HL_FMT, id)
80
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_FMT, id)
81
+ {
82
+ name: name,
83
+ n: num,
84
+ type: ht_type_to_sym(type),
85
+ id: id
86
+ }
87
+ end
88
+ end
46
89
 
47
- # def fields # iterator
48
- # end
90
+ def length
91
+ @record.struct[:n_fmt]
92
+ end
49
93
 
50
- def genotypes; end
94
+ def size
95
+ length
96
+ end
97
+
98
+ def to_h
99
+ ret = {}
100
+ ids.each do |id|
101
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, id)
102
+ ret[name] = get(name)
103
+ end
104
+ ret
105
+ end
106
+
107
+ # def genotypes; end
108
+
109
+ private
110
+
111
+ def fmt_ptr
112
+ @record.struct[:d][:fmt].to_ptr
113
+ end
114
+
115
+ def ids
116
+ fmt_ptr.read_array_of_struct(LibHTS::BcfFmt, length).map do |fmt|
117
+ fmt[:id]
118
+ end
119
+ end
120
+
121
+ def get_fmt_type(qname)
122
+ @record.struct[:n_fmt].times do |i|
123
+ fmt = LibHTS::BcfFmt.new(@record.struct[:d][:fmt] + i * LibHTS::BcfFmt.size)
124
+ id = fmt[:id]
125
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, id)
126
+ if name == qname
127
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_FMT, id)
128
+ return type
129
+ end
130
+ end
131
+ nil
132
+ end
133
+
134
+ def ht_type_to_sym(t)
135
+ case t
136
+ when LibHTS::BCF_HT_FLAG then :flag
137
+ when LibHTS::BCF_HT_INT then :int
138
+ when LibHTS::BCF_HT_REAL then :float
139
+ when LibHTS::BCF_HT_STR then :string
140
+ when LibHTS::BCF_HT_LONG then :float
141
+ end
142
+ end
51
143
  end
52
144
  end
53
145
  end
@@ -1,10 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Header
6
- def initialize(bcf_hdr)
7
- @bcf_hdr = bcf_hdr
6
+ def initialize(hts_file)
7
+ @bcf_hdr = LibHTS.bcf_hdr_read(hts_file)
8
8
  end
9
9
 
10
10
  def struct
@@ -15,12 +15,33 @@ module HTS
15
15
  @bcf_hdr.to_ptr
16
16
  end
17
17
 
18
+ def get_version
19
+ LibHTS.bcf_hdr_get_version(@bcf_hdr)
20
+ end
21
+
22
+ def nsamples
23
+ LibHTS.bcf_hdr_nsamples(@bcf_hdr)
24
+ end
25
+
26
+ def samples
27
+ # bcf_hdr_id2name is macro function
28
+ @bcf_hdr[:samples]
29
+ .read_array_of_pointer(nsamples)
30
+ .map(&:read_string)
31
+ end
32
+
18
33
  def to_s
19
34
  kstr = LibHTS::KString.new
20
35
  raise "Failed to get header string" unless LibHTS.bcf_hdr_format(@bcf_hdr, 0, kstr)
21
36
 
22
37
  kstr[:s]
23
38
  end
39
+
40
+ private
41
+
42
+ def initialize_copy(orig)
43
+ @bcf_hdr = LibHTS.bcf_hdr_dup(orig.struct)
44
+ end
24
45
  end
25
46
  end
26
47
  end
data/lib/hts/bcf/info.rb CHANGED
@@ -1,27 +1,52 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- class Bcf
4
+ class Bcf < Hts
5
5
  class Info
6
6
  def initialize(record)
7
7
  @record = record
8
+ @p1 = FFI::MemoryPointer.new(:pointer) # FIXME: naming
9
+ end
10
+
11
+ # For compatibility with htslib.cr.
12
+ def get_int(key)
13
+ get(key, :int)
14
+ end
15
+
16
+ # For compatibility with htslib.cr.
17
+ def get_float(key)
18
+ get(key, :float)
19
+ end
20
+
21
+ # For compatibility with htslib.cr.
22
+ def get_string(key)
23
+ get(key, :string)
24
+ end
25
+
26
+ # For compatibility with htslib.cr.
27
+ def get_flag(key)
28
+ get(key, :flag)
29
+ end
30
+
31
+ def [](key)
32
+ get(key)
8
33
  end
9
34
 
10
35
  # @note Specify the type. If you don't specify a type, it will still work, but it will be slower.
11
36
  def get(key, type = nil)
12
37
  n = FFI::MemoryPointer.new(:int)
13
- p1 = @record.p1
14
- h = @record.bcf.header.struct
38
+ p1 = @p1
39
+ h = @record.header.struct
15
40
  r = @record.struct
16
41
 
17
- info_values = proc do |type|
18
- ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, type)
42
+ info_values = proc do |typ|
43
+ ret = LibHTS.bcf_get_info_values(h, r, key, p1, n, typ)
19
44
  return nil if ret < 0 # return from method.
20
45
 
21
46
  p1.read_pointer
22
47
  end
23
48
 
24
- type ||= info_type_to_string(get_info_type(key))
49
+ type ||= ht_type_to_sym(get_info_type(key))
25
50
 
26
51
  case type&.to_sym
27
52
  when :int, :int32
@@ -46,47 +71,68 @@ module HTS
46
71
 
47
72
  # FIXME: naming? room for improvement.
48
73
  def fields
49
- n_info = @record.struct[:n_info]
50
- Array.new(n_info) do |i|
51
- fld = LibHTS::BcfInfo.new(
52
- @record.struct[:d][:info] +
53
- i * LibHTS::BcfInfo.size
54
- )
74
+ keys.map do |key|
75
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
76
+ num = LibHTS.bcf_hdr_id2number(@record.header.struct, LibHTS::BCF_HL_INFO, key)
77
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, key)
55
78
  {
56
- name: LibHTS.bcf_hdr_int2id(
57
- @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
58
- ),
59
- n: LibHTS.bcf_hdr_id2number(
60
- @record.bcf.header.struct, LibHTS::BCF_HL_INFO, fld[:key]
61
- ),
62
- vtype: fld[:type], i: fld[:key]
79
+ name: name,
80
+ n: num,
81
+ type: ht_type_to_sym(type),
82
+ key: key
63
83
  }
64
84
  end
65
85
  end
66
86
 
87
+ def length
88
+ @record.struct[:n_info]
89
+ end
90
+
91
+ def size
92
+ length
93
+ end
94
+
95
+ def to_h
96
+ ret = {}
97
+ keys.each do |key|
98
+ name = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, key)
99
+ ret[name] = get(name)
100
+ end
101
+ ret
102
+ end
103
+
67
104
  private
68
105
 
106
+ def info_ptr
107
+ @record.struct[:d][:info].to_ptr
108
+ end
109
+
110
+ def keys
111
+ info_ptr.read_array_of_struct(LibHTS::BcfInfo, length).map do |info|
112
+ info[:key]
113
+ end
114
+ end
115
+
69
116
  def get_info_type(key)
70
117
  @record.struct[:n_info].times do |i|
71
- fld = LibHTS::BcfInfo.new(
72
- @record.struct[:d][:info] +
73
- i * LibHTS::BcfInfo.size
74
- )
75
- id = LibHTS.bcf_hdr_int2id(
76
- @record.bcf.header.struct, LibHTS::BCF_DT_ID, fld[:key]
77
- )
78
- return fld[:type] if id == key
118
+ info = LibHTS::BcfInfo.new(@record.struct[:d][:info] + i * LibHTS::BcfInfo.size)
119
+ k = info[:key]
120
+ id = LibHTS.bcf_hdr_int2id(@record.header.struct, LibHTS::BCF_DT_ID, k)
121
+ if id == key
122
+ type = LibHTS.bcf_hdr_id2type(@record.header.struct, LibHTS::BCF_HL_INFO, k)
123
+ return type
124
+ end
79
125
  end
126
+ nil
80
127
  end
81
128
 
82
- def info_type_to_string(t)
129
+ def ht_type_to_sym(t)
83
130
  case t
84
- when 0 then :flag
85
- when 1, 2, 3, 4 then :int
86
- when 5 then :float
87
- when 7 then :string
88
- else
89
- raise "Unknown info type: #{t}"
131
+ when LibHTS::BCF_HT_FLAG then :flag
132
+ when LibHTS::BCF_HT_INT then :int
133
+ when LibHTS::BCF_HT_REAL then :float
134
+ when LibHTS::BCF_HT_STR then :string
135
+ when LibHTS::BCF_HT_LONG then :float
90
136
  end
91
137
  end
92
138
  end