htslib 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,27 @@ module HTS
12
12
  :l, :size_t,
13
13
  :m, :size_t,
14
14
  :s, :string
15
+
16
+ def buffer_ptr
17
+ to_ptr.get_pointer(self.class.offset_of(:s))
18
+ end
19
+
20
+ def read_string_copy
21
+ ptr = buffer_ptr
22
+ return "" if ptr.null?
23
+
24
+ ptr.read_string(self[:l])
25
+ end
26
+
27
+ def free_buffer
28
+ ptr = buffer_ptr
29
+ return if ptr.null?
30
+
31
+ LibHTS.hts_free(ptr)
32
+ to_ptr.put_pointer(self.class.offset_of(:s), FFI::Pointer::NULL)
33
+ self[:l] = 0
34
+ self[:m] = 0
35
+ end
15
36
  end
16
37
 
17
38
  class KSeq < FFI::Struct
@@ -430,6 +451,17 @@ module HTS
430
451
  :idx, HtsIdx.ptr,
431
452
  :dict, :pointer
432
453
 
454
+ def close
455
+ return if @closed
456
+
457
+ ptr = to_ptr
458
+ unless ptr.null?
459
+ ptr.autorelease = false if ptr.respond_to?(:autorelease=)
460
+ self.class.release(ptr)
461
+ end
462
+ @closed = true
463
+ end
464
+
433
465
  def self.release(ptr)
434
466
  LibHTS.tbx_destroy(ptr) unless ptr.null?
435
467
  end
@@ -466,7 +498,7 @@ module HTS
466
498
  :n, :int
467
499
  end
468
500
 
469
- # Complete textual representation of a header line
501
+ # Complete textual representation of a header line owned by Ruby.
470
502
  class BcfHrec < FFI::ManagedStruct
471
503
  layout \
472
504
  :type, :int,
@@ -503,7 +535,7 @@ module HTS
503
535
  class BcfIdinfo < FFI::Struct
504
536
  layout \
505
537
  :info, [:uint64, 3],
506
- :hrec, [BcfHrec.ptr, 3],
538
+ :hrec, [:pointer, 3],
507
539
  :id, :int
508
540
  end
509
541
 
@@ -330,11 +330,6 @@ module HTS
330
330
  %i[cram_fd cram_block cram_metrics int int],
331
331
  :int
332
332
 
333
- # attach_function \
334
- # :cram_compress_block2,
335
- # %i[cram_fd cram_slice cram_block cram_metrics int int],
336
- # :int
337
-
338
333
  # Creates a new container, specifying the maximum number of slices
339
334
  # and records permitted.
340
335
  attach_function \
@@ -48,24 +48,24 @@ module HTS
48
48
  attach_function \
49
49
  :fai_fetch,
50
50
  [Faidx, :string, :pointer],
51
- :string
51
+ :pointer
52
52
 
53
53
  # Fetch the sequence in a region
54
54
  attach_function \
55
55
  :fai_fetch64,
56
56
  [Faidx, :string, :pointer],
57
- :string
57
+ :pointer
58
58
 
59
59
  # Fetch the quality string for a region for FASTQ files
60
60
  attach_function \
61
61
  :fai_fetchqual,
62
62
  [Faidx, :string, :pointer],
63
- :string
63
+ :pointer
64
64
 
65
65
  attach_function \
66
66
  :fai_fetchqual64,
67
67
  [Faidx, :string, :pointer],
68
- :string
68
+ :pointer
69
69
 
70
70
  # Fetch the number of sequences
71
71
  attach_function \
@@ -77,25 +77,25 @@ module HTS
77
77
  attach_function \
78
78
  :faidx_fetch_seq,
79
79
  [Faidx, :string, :int, :int, :pointer],
80
- :string
80
+ :pointer
81
81
 
82
82
  # Fetch the sequence in a region
83
83
  attach_function \
84
84
  :faidx_fetch_seq64,
85
85
  [Faidx, :string, :int64, :int64, :pointer],
86
- :string
86
+ :pointer
87
87
 
88
88
  # Fetch the quality string in a region for FASTQ files
89
89
  attach_function \
90
90
  :faidx_fetch_qual,
91
91
  [Faidx, :string, :int, :int, :pointer],
92
- :string
92
+ :pointer
93
93
 
94
94
  # Fetch the quality string in a region for FASTQ files
95
95
  attach_function \
96
96
  :faidx_fetch_qual64,
97
97
  [Faidx, :string, :int64, :int64, :pointer],
98
- :string
98
+ :pointer
99
99
 
100
100
  # Query if sequence is present
101
101
  attach_function \
@@ -121,6 +121,11 @@ module HTS
121
121
  [Faidx, :string],
122
122
  :int
123
123
 
124
+ attach_function \
125
+ :faidx_seq_len64,
126
+ [Faidx, :string],
127
+ :int64
128
+
124
129
  # Parses a region string.
125
130
  attach_function \
126
131
  :fai_parse_region,
@@ -46,10 +46,10 @@ module HTS
46
46
 
47
47
  # Report the current stream offset
48
48
  def self.htell(fp)
49
- # TODO: This is a hack. Is this OK?
50
- bg = FFI::Pointer.new(:int, fp.pointer.address + fp.offset_of(:begin)).read_int
51
- bf = FFI::Pointer.new(:int, fp.pointer.address + fp.offset_of(:buffer)).read_int
52
- fp[:offset] + (bg - bf)
49
+ base_ptr = fp.pointer
50
+ begin_ptr = base_ptr.get_pointer(fp.offset_of(:begin))
51
+ buffer_ptr = base_ptr.get_pointer(fp.offset_of(:buffer))
52
+ fp[:offset] + (begin_ptr.address - buffer_ptr.address)
53
53
  end
54
54
 
55
55
  # Read from the stream until the delimiter, up to a maximum length
@@ -467,5 +467,11 @@ module HTS
467
467
  :hts_md5_destroy,
468
468
  [:pointer],
469
469
  :void
470
+
471
+ # Computes CRC32 for a buffer with an initial crc value
472
+ attach_function \
473
+ :hts_crc32,
474
+ %i[uint32 pointer size_t],
475
+ :uint32
470
476
  end
471
477
  end
@@ -382,11 +382,15 @@ module HTS
382
382
 
383
383
  # Get the next read from a SAM/BAM/CRAM iterator
384
384
  def self.sam_itr_next(htsfp, itr, r)
385
- # FIXME: check if htsfp is compressed BGZF
385
+ raise("#{htsfp[:fn] || 'File'} not BGZF compressed") unless htsfp[:is_bgzf] == 1 || htsfp[:is_cram] == 1
386
386
  raise("Null iterator") if itr.null?
387
387
 
388
- # FIXME: check multi
389
- hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
388
+ if itr[:multi] == 1
389
+ hts_itr_multi_next(htsfp, itr, r)
390
+ else
391
+ bgzf = htsfp[:is_bgzf] == 1 ? htsfp[:fp][:bgzf] : FFI::Pointer::NULL
392
+ hts_itr_next(bgzf, itr, r, htsfp)
393
+ end
390
394
  end
391
395
 
392
396
  attach_function \
@@ -508,7 +512,7 @@ module HTS
508
512
  # Append tag data to a bam record
509
513
  attach_function \
510
514
  :bam_aux_append,
511
- [Bam1, :string, :string, :int, :pointer],
515
+ [Bam1, :string, :char, :int, :pointer],
512
516
  :int
513
517
 
514
518
  # Delete tag data from a bam record
@@ -738,6 +742,18 @@ module HTS
738
742
  :bam_mods_recorded,
739
743
  [HtsBaseModState, :pointer],
740
744
  :pointer
745
+
746
+ # Sets the header to the file
747
+ attach_function \
748
+ :sam_hdr_set,
749
+ [HtsFile, SamHdr, :int],
750
+ :int
751
+
752
+ # Get the header from the file pointer
753
+ attach_function \
754
+ :sam_hdr_get,
755
+ [HtsFile],
756
+ SamHdr
741
757
  end
742
758
  end
743
759
 
@@ -270,12 +270,12 @@ module HTS
270
270
  attach_function \
271
271
  :bcf_hdr_get_hrec,
272
272
  [BcfHdr, :int, :string, :string, :string],
273
- BcfHrec.by_ref
273
+ :pointer
274
274
 
275
275
  # Duplicate a header record
276
276
  attach_function \
277
277
  :bcf_hrec_dup,
278
- [BcfHrec],
278
+ [:pointer],
279
279
  BcfHrec.by_ref
280
280
 
281
281
  # Add a new header record key
@@ -304,7 +304,7 @@ module HTS
304
304
  # Free up a header record and associated structures
305
305
  attach_function \
306
306
  :bcf_hrec_destroy,
307
- [BcfHrec],
307
+ [:pointer],
308
308
  :void
309
309
 
310
310
  # Individual record querying and manipulation routines
@@ -539,15 +539,18 @@ module HTS
539
539
  [HtsFile],
540
540
  :int
541
541
 
542
- attach_function \
542
+ attach_variable \
543
543
  :bcf_float_vector_end,
544
- [],
545
544
  :uint32
546
545
 
547
- attach_function \
546
+ attach_variable \
548
547
  :bcf_float_missing,
549
- [],
550
548
  :uint32
549
+
550
+ attach_function \
551
+ :bcf_format_gt_v2,
552
+ [BcfHdr, BcfFmt, :int, KString],
553
+ :int
551
554
  end
552
555
  end
553
556
 
@@ -97,6 +97,11 @@ module HTS
97
97
  bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
98
98
  end
99
99
 
100
+ # Function for updating INFO fields (int64; VCF only)
101
+ def bcf_update_info_int64(hdr, line, key, values, n)
102
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG)
103
+ end
104
+
100
105
  # Function for updating INFO fields
101
106
  def bcf_update_info_float(hdr, line, key, values, n)
102
107
  bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
@@ -150,9 +155,19 @@ module HTS
150
155
  0
151
156
  end
152
157
 
158
+ # Macro for setting genotypes correctly
159
+ def bcf_gt_vector_end
160
+ bcf_int32_vector_end
161
+ end
162
+
153
163
  # Macro for setting genotypes correctly
154
164
  def bcf_gt_is_missing(val)
155
- (val >> 1 ? 0 : 1)
165
+ ((val >> 1) == 0 ? 1 : 0)
166
+ end
167
+
168
+ # Macro for setting genotypes correctly
169
+ def bcf_gt_is_vector_end(val)
170
+ val == bcf_gt_vector_end ? 1 : 0
156
171
  end
157
172
 
158
173
  # Macro for setting genotypes correctly
@@ -175,6 +190,11 @@ module HTS
175
190
  bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
176
191
  end
177
192
 
193
+ # Get INFO values (int64; VCF only)
194
+ def bcf_get_info_int64(hdr, line, tag, dst, ndst)
195
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_LONG)
196
+ end
197
+
178
198
  # Get INFO values
179
199
  def bcf_get_info_float(hdr, line, tag, dst, ndst)
180
200
  bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
@@ -265,7 +285,16 @@ module HTS
265
285
  )[:val][:info][type] & 0xf
266
286
  end
267
287
 
268
- # def bcf_hdr_idinfo_exists
288
+ def bcf_hdr_idinfo_exists(hdr, type, int_id)
289
+ return false if int_id.negative? || int_id >= hdr[:n][LibHTS::BCF_DT_ID]
290
+
291
+ pair = LibHTS::BcfIdpair.new(
292
+ hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
293
+ LibHTS::BcfIdpair.size * int_id # offset
294
+ )
295
+
296
+ !pair[:val].null? && bcf_hdr_id2coltype(hdr, type, int_id) != 0xf
297
+ end
269
298
 
270
299
  # def bcf_hdr_id2hrec
271
300
 
data/lib/hts/tabix.rb CHANGED
@@ -89,8 +89,11 @@ module HTS
89
89
  def seqnames
90
90
  check_closed
91
91
  nseq = FFI::MemoryPointer.new(:int)
92
- LibHTS.tbx_seqnames(@idx, nseq).then do |pts|
92
+ pts = LibHTS.tbx_seqnames(@idx, nseq)
93
+ begin
93
94
  pts.read_array_of_pointer(nseq.read_int).map(&:read_string)
95
+ ensure
96
+ LibHTS.hts_free(pts) unless pts.null?
94
97
  end
95
98
  end
96
99
 
@@ -108,9 +111,7 @@ module HTS
108
111
  def close
109
112
  return if closed?
110
113
 
111
- # @idx is an internal index (LibHTS::Tbx, a ManagedStruct).
112
- # Do not call tbx_destroy here; the FFI finalizer will
113
- # release the underlying C struct when @idx becomes unreachable.
114
+ @idx.close if @idx && !@idx.null?
114
115
  @idx = nil
115
116
  super
116
117
  end
@@ -144,8 +145,12 @@ module HTS
144
145
  def query_yield(qiter)
145
146
  r = LibHTS::KString.new
146
147
  begin
147
- yield r[:s].split("\t") while LibHTS.tbx_itr_next(@hts_file, @idx, qiter, r) > 0
148
+ while (slen = LibHTS.tbx_itr_next(@hts_file, @idx, qiter, r)) >= 0
149
+ yield r.read_string_copy.split("\t")
150
+ end
151
+ raise if slen < -1
148
152
  ensure
153
+ r.free_buffer
149
154
  LibHTS.hts_itr_destroy(qiter)
150
155
  end
151
156
  end
data/lib/hts/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- VERSION = "0.3.2"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: htslib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-11-27 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: ffi
@@ -71,13 +71,13 @@ files:
71
71
  - lib/hts/bam/pileup.rb
72
72
  - lib/hts/bam/record.rb
73
73
  - lib/hts/bcf.rb
74
+ - lib/hts/bcf/errors.rb
74
75
  - lib/hts/bcf/format.rb
75
76
  - lib/hts/bcf/header.rb
76
77
  - lib/hts/bcf/header_record.rb
77
78
  - lib/hts/bcf/info.rb
78
79
  - lib/hts/bcf/record.rb
79
80
  - lib/hts/faidx.rb
80
- - lib/hts/faidx/sequence.rb
81
81
  - lib/hts/ffi_ext/README.md
82
82
  - lib/hts/ffi_ext/pointer.rb
83
83
  - lib/hts/ffi_ext/struct.rb
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
119
  - !ruby/object:Gem::Version
120
120
  version: '0'
121
121
  requirements: []
122
- rubygems_version: 3.6.2
122
+ rubygems_version: 4.0.10
123
123
  specification_version: 4
124
124
  summary: HTSlib bindings for Ruby
125
125
  test_files: []
@@ -1,62 +0,0 @@
1
- module HTS
2
- class Faidx
3
- class Sequence
4
- attr_reader :name, :faidx
5
-
6
- def initialize(faidx, name)
7
- raise ArgumentError, "Sequence not found: #{name}" unless faidx.has_key?(name)
8
-
9
- @faidx = faidx
10
- @name = name
11
- end
12
-
13
- def length
14
- faidx.seq_len(name)
15
- end
16
- alias size length
17
-
18
- def seq(start = nil, stop = nil)
19
- faidx.seq(name, start, stop)
20
- end
21
-
22
- def qual(start = nil, stop = nil)
23
- faidx.qual(name, start, stop)
24
- end
25
-
26
- def [](arg)
27
- case arg
28
- when Integer
29
- if arg >= 0
30
- start = arg
31
- stop = arg
32
- else
33
- start = length + arg
34
- stop = length + arg
35
- end
36
- when Range
37
- arg = Range.new(arg.begin, arg.end + length, arg.exclude_end?) if arg.end&.<(0)
38
- arg = Range.new(arg.begin + length, arg.end, arg.exclude_end?) if arg.begin&.<(0)
39
- if arg.begin.nil?
40
- if arg.end.nil?
41
- start = nil
42
- stop = nil
43
- else
44
- start = 0
45
- stop = arg.exclude_end? ? arg.end - 1 : arg.end
46
- end
47
- elsif arg.end.nil?
48
- # always include the first base
49
- start = arg.begin
50
- stop = length - 1
51
- else
52
- start = arg.begin
53
- stop = arg.exclude_end? ? arg.end - 1 : arg.end
54
- end
55
- else
56
- raise ArgumentError
57
- end
58
- seq(start, stop)
59
- end
60
- end
61
- end
62
- end