htslib 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/hts/bcf.rb CHANGED
@@ -9,6 +9,7 @@ require_relative "bcf/format"
9
9
  require_relative "bcf/record"
10
10
 
11
11
  module HTS
12
+ # A class for working with VCF, BCF files.
12
13
  class Bcf < Hts
13
14
  include Enumerable
14
15
 
@@ -26,7 +27,7 @@ module HTS
26
27
  file
27
28
  end
28
29
 
29
- def initialize(file_name, mode = "r", index: nil, fai: nil, threads: nil,
30
+ def initialize(file_name, mode = "r", index: nil, threads: nil,
30
31
  create_index: false)
31
32
  if block_given?
32
33
  message = "HTS::Bcf.new() dose not take block; Please use HTS::Bcf.open() instead"
@@ -42,32 +43,31 @@ module HTS
42
43
 
43
44
  raise Errno::ENOENT, "Failed to open #{@file_name}" if @hts_file.null?
44
45
 
45
- if threads&.> 0
46
- r = LibHTS.hts_set_threads(@hts_file, threads)
47
- raise "Failed to set number of threads: #{threads}" if r < 0
48
- end
46
+ set_threads(threads) if threads
49
47
 
50
48
  return if @mode[0] == "w"
51
49
 
52
50
  @header = Bcf::Header.new(@hts_file)
53
-
54
51
  create_index(index) if create_index
55
-
56
52
  @idx = load_index(index)
57
-
58
53
  @start_position = tell
54
+ super # do nothing
59
55
  end
60
56
 
61
57
  def create_index(index_name = nil)
58
+ check_closed
59
+
62
60
  warn "Create index for #{@file_name} to #{index_name}"
63
- if index
64
- LibHTS.bcf_index_build2(@hts_file, index_name, -1)
61
+ if index_name
62
+ LibHTS.bcf_index_build2(@file_name, index_name, -1)
65
63
  else
66
- LibHTS.bcf_index_build(@hts_file, -1)
64
+ LibHTS.bcf_index_build(@file_name, -1)
67
65
  end
68
66
  end
69
67
 
70
68
  def load_index(index_name = nil)
69
+ check_closed
70
+
71
71
  if index_name
72
72
  LibHTS.bcf_index_load2(@file_name, index_name)
73
73
  else
@@ -76,39 +76,50 @@ module HTS
76
76
  end
77
77
 
78
78
  def index_loaded?
79
+ check_closed
80
+
79
81
  !@idx.null?
80
82
  end
81
83
 
82
84
  def write_header
83
- raise IOError, "closed stream" if closed?
85
+ check_closed
84
86
 
85
87
  @header = header.dup
86
88
  LibHTS.hts_set_fai_filename(header, @file_name)
87
- LibHTS.bcf_hdr_write(@hts_file, header.struct)
89
+ LibHTS.bcf_hdr_write(@hts_file, header)
88
90
  end
89
91
 
90
92
  def write(var)
91
- raise IOError, "closed stream" if closed?
93
+ check_closed
92
94
 
93
- var_dup = var.dup = var.dup
95
+ var_dup = var.dup
94
96
  LibHTS.bcf_write(@hts_file, header, var_dup) > 0 || raise
95
97
  end
96
98
 
97
99
  # Close the current file.
98
100
 
99
101
  def nsamples
102
+ check_closed
103
+
100
104
  header.nsamples
101
105
  end
102
106
 
103
107
  def samples
108
+ check_closed
109
+
104
110
  header.samples
105
111
  end
106
112
 
107
- # Iterate over each record.
108
- # Generate a new Record object each time.
109
- # Slower than each.
110
- def each_copy
111
- raise IOError, "closed stream" if closed?
113
+ def each(copy: false, &block)
114
+ if copy
115
+ each_record_copy(&block)
116
+ else
117
+ each_record_reuse(&block)
118
+ end
119
+ end
120
+
121
+ private def each_record_copy
122
+ check_closed
112
123
 
113
124
  return to_enum(__method__) unless block_given?
114
125
 
@@ -119,12 +130,10 @@ module HTS
119
130
  self
120
131
  end
121
132
 
122
- # Iterate over each record.
123
- # Record object is reused.
124
- # Faster than each_copy.
125
- def each
126
- raise IOError, "closed stream" if closed?
127
-
133
+ private def each_record_reuse
134
+ check_closed
135
+ # Each does not always start at the beginning of the file.
136
+ # This is the common behavior of IO objects in Ruby.
128
137
  return to_enum(__method__) unless block_given?
129
138
 
130
139
  bcf1 = LibHTS.bcf_init
@@ -132,5 +141,76 @@ module HTS
132
141
  yield record while LibHTS.bcf_read(@hts_file, header, bcf1) != -1
133
142
  self
134
143
  end
144
+
145
+ # @!macro [attach] define_getter
146
+ # @method $1
147
+ # Get $1 array
148
+ # @return [Array] the $1 array
149
+ define_getter :chrom
150
+ define_getter :pos
151
+ define_getter :endpos
152
+ define_getter :id
153
+ define_getter :ref
154
+ define_getter :alt
155
+ define_getter :qual
156
+ define_getter :filter
157
+
158
+ def info(key = nil)
159
+ check_closed
160
+ position = tell
161
+ if key
162
+ ary = map { |r| r.info(key) }
163
+ else
164
+ raise NotImplementedError
165
+ # ary = each_copy.map { |r| r.info }
166
+ # ary = map { |r| r.info.clone }
167
+ end
168
+ seek(position)
169
+ ary
170
+ end
171
+
172
+ def format(key = nil)
173
+ check_closed
174
+ position = tell
175
+ if key
176
+ ary = map { |r| r.format(key) }
177
+ else
178
+ raise NotImplementedError
179
+ # ary = each_copy.map { |r| r.format }
180
+ # ary = map { |r| r.format.clone }
181
+ end
182
+ seek(position)
183
+ ary
184
+ end
185
+
186
+ # @!macro [attach] define_iterator
187
+ # @method each_$1
188
+ # Get $1 iterator
189
+ define_iterator :chrom
190
+ define_iterator :pos
191
+ define_iterator :endpos
192
+ define_iterator :id
193
+ define_iterator :ref
194
+ define_iterator :alt
195
+ define_iterator :qual
196
+ define_iterator :filter
197
+
198
+ def each_info(key)
199
+ check_closed
200
+ return to_enum(__method__) unless block
201
+
202
+ each do |r|
203
+ yield r.info(key)
204
+ end
205
+ end
206
+
207
+ def each_format(key)
208
+ check_closed
209
+ return to_enum(__method__) unless block
210
+
211
+ each do |r|
212
+ yield r.format(key)
213
+ end
214
+ end
135
215
  end
136
216
  end
data/lib/hts/faidx.rb CHANGED
@@ -6,22 +6,32 @@ module HTS
6
6
  class Faidx
7
7
  attr_reader :file_name
8
8
 
9
- class << self
10
- alias open new
9
+ def self.open(*args, **kw)
10
+ file = new(*args, **kw) # do not yield
11
+ return file unless block_given?
12
+
13
+ begin
14
+ yield file
15
+ ensure
16
+ file.close
17
+ end
18
+ file
11
19
  end
12
20
 
13
21
  def initialize(file_name)
22
+ if block_given?
23
+ message = "HTS::Faidx.new() dose not take block; Please use HTS::Faidx.open() instead"
24
+ raise message
25
+ end
26
+
14
27
  @file_name = file_name
15
28
  @fai = LibHTS.fai_load(@file_name)
16
29
 
17
- # IO like API
18
- if block_given?
19
- begin
20
- yield self
21
- ensure
22
- close
23
- end
24
- end
30
+ raise Errno::ENOENT, "Failed to open #{@file_name}" if @fai.null?
31
+ end
32
+
33
+ def struct
34
+ @fai
25
35
  end
26
36
 
27
37
  def close
@@ -29,10 +39,10 @@ module HTS
29
39
  end
30
40
 
31
41
  # the number of sequences in the index.
32
- def size
42
+ def length
33
43
  LibHTS.faidx_nseq(@fai)
34
44
  end
35
- alias length size
45
+ alias size length
36
46
 
37
47
  # return the length of the requested chromosome.
38
48
  def chrom_size(chrom)
@@ -48,10 +58,10 @@ module HTS
48
58
  alias chrom_length chrom_size
49
59
 
50
60
  # FIXME: naming and syntax
51
- def cget; end
61
+ # def cget; end
52
62
 
53
63
  # FIXME: naming and syntax
54
- def get; end
64
+ # def get; end
55
65
 
56
66
  # __iter__
57
67
  end
data/lib/hts/hts.rb CHANGED
@@ -3,7 +3,38 @@
3
3
  require_relative "../htslib"
4
4
 
5
5
  module HTS
6
+ # A base class for hts files.
6
7
  class Hts
8
+ class << self
9
+ private
10
+
11
+ def define_getter(name)
12
+ define_method(name) do
13
+ check_closed
14
+ position = tell
15
+ ary = map(&name)
16
+ seek(position)
17
+ ary
18
+ end
19
+ end
20
+
21
+ def define_iterator(name)
22
+ define_method("each_#{name}") do |&block|
23
+ check_closed
24
+ return to_enum(__method__) unless block
25
+
26
+ each do |record|
27
+ block.call(record.public_send(name))
28
+ end
29
+ self
30
+ end
31
+ end
32
+ end
33
+
34
+ def initialize(*args)
35
+ # do nothing
36
+ end
37
+
7
38
  def struct
8
39
  @hts_file
9
40
  end
@@ -12,11 +43,11 @@ module HTS
12
43
  @hts_file.to_ptr
13
44
  end
14
45
 
15
- def format
46
+ def file_format
16
47
  LibHTS.hts_get_format(@hts_file)[:format].to_s
17
48
  end
18
49
 
19
- def format_version
50
+ def file_format_version
20
51
  v = LibHTS.hts_get_format(@hts_file)[:version]
21
52
  major = v[:major]
22
53
  minor = v[:minor]
@@ -38,6 +69,16 @@ module HTS
38
69
  @hts_file.nil? || @hts_file.null?
39
70
  end
40
71
 
72
+ def set_threads(n)
73
+ raise TypeError unless n.is_a(Integer)
74
+
75
+ if n > 0
76
+ r = LibHTS.hts_set_threads(@hts_file, n)
77
+ raise "Failed to set number of threads: #{threads}" if r < 0
78
+ end
79
+ self
80
+ end
81
+
41
82
  def seek(offset)
42
83
  if @hts_file[:is_cram] == 1
43
84
  LibHTS.cram_seek(@hts_file[:fp][:cram], offset, IO::SEEK_SET)
@@ -70,5 +111,11 @@ module HTS
70
111
  raise "Cannot rewind: no start position"
71
112
  end
72
113
  end
114
+
115
+ private
116
+
117
+ def check_closed
118
+ raise IOError, "closed stream" if closed?
119
+ end
73
120
  end
74
121
  end
@@ -18,19 +18,19 @@ module HTS
18
18
  # Open an existing hFILE stream for reading or writing.
19
19
  attach_function \
20
20
  :bgzf_hopen,
21
- [HFILE, :string],
21
+ [HFile, :string],
22
22
  BGZF.by_ref
23
23
 
24
24
  # Close the BGZF and free all associated resources.
25
25
  attach_function \
26
26
  :bgzf_close,
27
- [HFILE],
27
+ [HFile],
28
28
  :int
29
29
 
30
30
  # Read up to _length_ bytes from the file storing into _data_.
31
31
  attach_function \
32
32
  :bgzf_read,
33
- [HFILE, :pointer, :size_t],
33
+ [HFile, :pointer, :size_t],
34
34
  :ssize_t
35
35
 
36
36
  # Write _length_ bytes from _data_ to the file. If no I/O errors occur,
@@ -181,7 +181,7 @@ module HTS
181
181
  # Load BGZF index from an hFILE
182
182
  attach_function \
183
183
  :bgzf_index_load_hfile,
184
- [BGZF, HFILE, :string],
184
+ [BGZF, HFile, :string],
185
185
  :int
186
186
 
187
187
  # Save BGZF index
@@ -193,7 +193,7 @@ module HTS
193
193
  # Write a BGZF index to an hFILE
194
194
  attach_function \
195
195
  :bgzf_index_dump_hfile,
196
- [BGZF, HFILE, :string],
196
+ [BGZF, HFile, :string],
197
197
  :int
198
198
  end
199
199
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
+ # Module for working with C HTSlib.
4
5
  module LibHTS
5
6
  typedef :int64, :hts_pos_t
6
7
  typedef :pointer, :bam_plp_auto_f
@@ -24,9 +25,9 @@ module HTS
24
25
  :f, :pointer # kstream_t
25
26
  end
26
27
 
27
- # HFILE
28
+ # HFile
28
29
 
29
- class HFILE < FFI::BitStruct
30
+ class HFile < FFI::BitStruct
30
31
  layout \
31
32
  :buffer, :string,
32
33
  :begin, :string,
@@ -56,7 +57,7 @@ module HTS
56
57
  :uncompressed_block, :pointer,
57
58
  :compressed_block, :pointer,
58
59
  :cache, :pointer,
59
- :fp, HFILE.ptr,
60
+ :fp, HFile.ptr,
60
61
  :mt, :pointer,
61
62
  :idx, :pointer,
62
63
  :idx_build_otf, :int,
@@ -189,6 +190,16 @@ module HTS
189
190
  )
190
191
  end
191
192
 
193
+ class HtsReglist < FFI::Struct
194
+ layout \
195
+ :reg, :string,
196
+ :intervals, :pointer, # hts_pair_pos_t
197
+ :tid, :int,
198
+ :count, :uint32_t,
199
+ :min_beg, :hts_pos_t,
200
+ :max_end, :hts_pos_t
201
+ end
202
+
192
203
  # HtsFile
193
204
  class SamHdr < FFI::Struct
194
205
  layout \
@@ -217,7 +228,7 @@ module HTS
217
228
  union_layout(
218
229
  :bgzf, BGZF.ptr,
219
230
  :cram, :pointer, # cram_fd
220
- :hfile, HFILE.ptr
231
+ :hfile, HFile.ptr
221
232
  ),
222
233
  :state, :pointer,
223
234
  :format, HtsFormat,
@@ -263,7 +274,7 @@ module HTS
263
274
  :n_reg, :int,
264
275
  :beg, :int64,
265
276
  :end, :int64,
266
- :reg_list, :pointer,
277
+ :reg_list, :pointer, # HtsReglist.ptr,
267
278
  :curr_tid, :int,
268
279
  :curr_reg, :int,
269
280
  :curr_intv, :int,
@@ -516,5 +527,15 @@ module HTS
516
527
  LibHTS.bcf_destroy(ptr) unless ptr.null?
517
528
  end
518
529
  end
530
+
531
+ CramContentType = enum(
532
+ :ct_error, -1,
533
+ :file_header, 0,
534
+ :compression_header, 1,
535
+ :mapped_slice, 2,
536
+ :unmapped_slice, 3, # cram v1.0 only
537
+ :external, 4,
538
+ :core, 5
539
+ )
519
540
  end
520
541
  end