htslib 0.0.0 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- module FFI
4
+ module LibHTS
5
+ # constants
6
+ BAM_CMATCH = 0
7
+ BAM_CINS = 1
8
+ BAM_CDEL = 2
9
+ BAM_CREF_SKIP = 3
10
+ BAM_CSOFT_CLIP = 4
11
+ BAM_CHARD_CLIP = 5
12
+ BAM_CPAD = 6
13
+ BAM_CEQUAL = 7
14
+ BAM_CDIFF = 8
15
+ BAM_CBACK = 9
16
+
17
+ BAM_CIGAR_STR = "MIDNSHP=XB"
18
+ BAM_CIGAR_SHIFT = 4
19
+ BAM_CIGAR_MASK = 0xf
20
+ BAM_CIGAR_TYPE = 0x3C1A7
21
+
5
22
  # macros
6
23
  class << self
7
24
  def bam_cigar_op(c)
@@ -12,8 +29,8 @@ module HTS
12
29
  c >> BAM_CIGAR_SHIFT
13
30
  end
14
31
 
15
- def bam_cigar_opchar(c)
16
- _BAM_CIGAR_STR_PADDED[bam_cigar_op(c)]
32
+ def bam_cigar_opchr(c)
33
+ ("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
17
34
  end
18
35
 
19
36
  def bam_cigar_gen(l, o)
@@ -25,6 +42,19 @@ module HTS
25
42
  end
26
43
  end
27
44
 
45
+ BAM_FPAIRED = 1
46
+ BAM_FPROPER_PAIR = 2
47
+ BAM_FUNMAP = 4
48
+ BAM_FMUNMAP = 8
49
+ BAM_FREVERSE = 16
50
+ BAM_FMREVERSE = 32
51
+ BAM_FREAD1 = 64
52
+ BAM_FREAD2 = 128
53
+ BAM_FSECONDARY = 256
54
+ BAM_FQCFAIL = 512
55
+ BAM_FDUP = 1024
56
+ BAM_FSUPPLEMENTARY = 2048
57
+
28
58
  # macros
29
59
  # function-like macros
30
60
  class << self
@@ -60,7 +90,9 @@ module HTS
60
90
  b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
61
91
  end
62
92
 
63
- # def bam_seqi(s, i)
93
+ def bam_seqi(s, i)
94
+ s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
95
+ end
64
96
 
65
97
  # def bam_set_seqi(s, i, b)
66
98
  end
@@ -146,13 +178,13 @@ module HTS
146
178
  # Returns a complete line of formatted text for a given type and ID.
147
179
  attach_function \
148
180
  :sam_hdr_find_line_id,
149
- [SamHdr, :string, :string, :string, Kstring],
181
+ [SamHdr, :string, :string, :string, KString],
150
182
  :int
151
183
 
152
184
  # Returns a complete line of formatted text for a given type and index.
153
185
  attach_function \
154
186
  :sam_hdr_find_line_pos,
155
- [SamHdr, :string, :int, Kstring],
187
+ [SamHdr, :string, :int, KString],
156
188
  :int
157
189
 
158
190
  # Remove a line with given type / id from a header
@@ -206,13 +238,13 @@ module HTS
206
238
  # Return the value associated with a key for a header line identified by ID_key:ID_val
207
239
  attach_function \
208
240
  :sam_hdr_find_tag_id,
209
- [SamHdr, :string, :string, :string, :string, Kstring],
241
+ [SamHdr, :string, :string, :string, :string, KString],
210
242
  :int
211
243
 
212
244
  # Return the value associated with a key for a header line identified by position
213
245
  attach_function \
214
246
  :sam_hdr_find_tag_pos,
215
- [SamHdr, :string, :int, :string, Kstring],
247
+ [SamHdr, :string, :int, :string, KString],
216
248
  :int
217
249
 
218
250
  # Remove the key from the line identified by type, ID_key and ID_value.
@@ -405,6 +437,14 @@ module HTS
405
437
  [HtsIdx, SamHdr, :pointer, :uint],
406
438
  HtsItr.by_ref
407
439
 
440
+ # Get the next read from a SAM/BAM/CRAM iterator
441
+ def self.sam_itr_next(htsfp, itr, r)
442
+ # FIXME: check if htsfp is compressed BGZF
443
+ hts_log_error("Null iterator") if itr.null?
444
+ # FIXME: check multi
445
+ hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
446
+ end
447
+
408
448
  attach_function \
409
449
  :sam_parse_region,
410
450
  [SamHdr, :string, :pointer, :pointer, :pointer, :int],
@@ -435,12 +475,12 @@ module HTS
435
475
 
436
476
  attach_function \
437
477
  :sam_parse1,
438
- [Kstring, SamHdr, Bam1],
478
+ [KString, SamHdr, Bam1],
439
479
  :int
440
480
 
441
481
  attach_function \
442
482
  :sam_format1,
443
- [SamHdr, Bam1, Kstring],
483
+ [SamHdr, Bam1, KString],
444
484
  :int
445
485
 
446
486
  # Read a record from a file
@@ -543,52 +583,52 @@ module HTS
543
583
  attach_function \
544
584
  :bam_plp_init,
545
585
  %i[bam_plp_auto_f pointer],
546
- BamPlp
586
+ :bam_plp
547
587
 
548
588
  attach_function \
549
589
  :bam_plp_destroy,
550
- [BamPlp],
590
+ [:bam_plp],
551
591
  :void
552
592
 
553
593
  attach_function \
554
594
  :bam_plp_push,
555
- [BamPlp, Bam1],
595
+ [:bam_plp, Bam1],
556
596
  :int
557
597
 
558
598
  attach_function \
559
599
  :bam_plp_next,
560
- [BamPlp, :pointer, :pointer, :pointer],
561
- :pointer
600
+ %i[bam_plp pointer pointer pointer],
601
+ BamPileup1.by_ref
562
602
 
563
603
  attach_function \
564
604
  :bam_plp_auto,
565
- [BamPlp, :pointer, :pointer, :pointer],
566
- :pointer
605
+ %i[bam_plp pointer pointer pointer],
606
+ BamPileup1.by_ref
567
607
 
568
608
  attach_function \
569
609
  :bam_plp64_next,
570
- [BamPlp, :pointer, :pointer, :pointer],
571
- :pointer
610
+ %i[bam_plp pointer pointer pointer],
611
+ BamPileup1.by_ref
572
612
 
573
613
  attach_function \
574
614
  :bam_plp64_auto,
575
- [BamPlp, :pointer, :pointer, :pointer],
576
- :pointer
615
+ %i[bam_plp pointer pointer pointer],
616
+ BamPileup1.by_ref
577
617
 
578
618
  attach_function \
579
619
  :bam_plp_set_maxcnt,
580
- [BamPlp, :int],
620
+ %i[bam_plp int],
581
621
  :void
582
622
 
583
623
  attach_function \
584
624
  :bam_plp_reset,
585
- [BamPlp],
625
+ [:bam_plp],
586
626
  :void
587
627
 
588
628
  # sets a callback to initialise any per-pileup1_t fields.
589
629
  attach_function \
590
630
  :bam_plp_insertion,
591
- [:pointer, Kstring, :pointer],
631
+ [BamPileup1, KString, :pointer],
592
632
  :int
593
633
 
594
634
  # sets a callback to initialise any per-pileup1_t fields.
@@ -602,36 +642,36 @@ module HTS
602
642
  attach_function \
603
643
  :bam_mplp_init,
604
644
  %i[int bam_plp_auto_f pointer],
605
- BamMplp.by_ref
645
+ :bam_mplp
606
646
 
607
647
  attach_function \
608
648
  :bam_mplp_init_overlaps,
609
- [BamMplp],
649
+ [:bam_mplp],
610
650
  :int
611
651
 
612
652
  attach_function \
613
653
  :bam_mplp_destroy,
614
- [BamMplp],
654
+ [:bam_mplp],
615
655
  :void
616
656
 
617
657
  attach_function \
618
658
  :bam_mplp_set_maxcnt,
619
- [BamMplp, :int],
659
+ %i[bam_mplp int],
620
660
  :void
621
661
 
622
662
  attach_function \
623
663
  :bam_mplp_auto,
624
- [BamMplp, :pointer, :pointer, :pointer, :pointer],
664
+ %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
625
665
  :int
626
666
 
627
667
  attach_function \
628
668
  :bam_mplp64_auto,
629
- [BamMplp, :pointer, :pointer, :pointer, :pointer],
669
+ %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
630
670
  :int
631
671
 
632
672
  attach_function \
633
673
  :bam_mplp_reset,
634
- [BamMplp],
674
+ [:bam_mplp],
635
675
  :void
636
676
 
637
677
  # bam_mplp_constructor
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- module FFI
4
+ module LibHTS
5
5
  attach_function \
6
6
  :tbx_name2id,
7
7
  [Tbx, :string],
@@ -1,7 +1,183 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- module FFI
4
+ module LibHTS
5
+ # constants
6
+ BCF_HL_FLT = 0 # header line
7
+ BCF_HL_INFO = 1
8
+ BCF_HL_FMT = 2
9
+ BCF_HL_CTG = 3
10
+ BCF_HL_STR = 4 # structured header line TAG=<A=..,B=..>
11
+ BCF_HL_GEN = 5 # generic header line
12
+ BCF_HT_FLAG = 0 # header type
13
+
14
+ BCF_HT_INT = 1
15
+ BCF_HT_REAL = 2
16
+ BCF_HT_STR = 3
17
+ BCF_HT_LONG = (BCF_HT_INT | 0x100) # BCF_HT_INT, but for int64_t values; VCF only!
18
+
19
+ BCF_VL_FIXED = 0 # variable length
20
+ BCF_VL_VAR = 1
21
+ BCF_VL_A = 2
22
+ BCF_VL_G = 3
23
+ BCF_VL_R = 4
24
+
25
+ BCF_DT_ID = 0 # dictionary type
26
+ BCF_DT_CTG = 1
27
+ BCF_DT_SAMPLE = 2
28
+
29
+ BCF_BT_NULL = 0
30
+ BCF_BT_INT8 = 1
31
+ BCF_BT_INT16 = 2
32
+ BCF_BT_INT32 = 3
33
+ BCF_BT_INT64 = 4 # Unofficial, for internal use only.
34
+ BCF_BT_FLOAT = 5
35
+ BCF_BT_CHAR = 7
36
+
37
+ VCF_REF = 0
38
+ VCF_SNP = 1
39
+ VCF_MNP = 2
40
+ VCF_INDEL = 4
41
+ VCF_OTHER = 8
42
+ VCF_BND = 16 # breakend
43
+ VCF_OVERLAP = 32 # overlapping deletion, ALT=*
44
+
45
+ BCF1_DIRTY_ID = 1
46
+ BCF1_DIRTY_ALS = 2
47
+ BCF1_DIRTY_FLT = 4
48
+ BCF1_DIRTY_INF = 8
49
+
50
+ BCF_ERR_CTG_UNDEF = 1
51
+ BCF_ERR_TAG_UNDEF = 2
52
+ BCF_ERR_NCOLS = 4
53
+ BCF_ERR_LIMITS = 8
54
+ BCF_ERR_CHAR = 16
55
+ BCF_ERR_CTG_INVALID = 32
56
+ BCF_ERR_TAG_INVALID = 64
57
+
58
+ # macros
59
+ class << self
60
+ alias bcf_open hts_open
61
+ alias vcf_open hts_open
62
+ alias bcf_close hts_close
63
+ alias vcf_close hts_close
64
+
65
+ def bcf_hdr_nsamples(hdr)
66
+ hdr[:n][BCF_DT_SAMPLE]
67
+ end
68
+
69
+ def bcf_update_info_int32(hdr, line, key, values, n)
70
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
71
+ end
72
+
73
+ def bcf_update_info_float(hdr, line, key, values, n)
74
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
75
+ end
76
+
77
+ def bcf_update_info_flag(hdr, line, key, string, n)
78
+ bcf_update_info(hdr, line, key, string, n, BCF_HT_FLAG)
79
+ end
80
+
81
+ def bcf_update_info_string(hdr, line, key, string)
82
+ bcf_update_info(hdr, line, key, string, 1, BCF_HT_STR)
83
+ end
84
+
85
+ def bcf_update_format_int32(hdr, line, key, values, n)
86
+ bcf_update_format(hdr, line, key, values, n,
87
+ BCF_HT_INT)
88
+ end
89
+
90
+ def bcf_update_format_float(hdr, line, key, values, n)
91
+ bcf_update_format(hdr, line, key, values, n,
92
+ BCF_HT_REAL)
93
+ end
94
+
95
+ def bcf_update_format_char(hdr, line, key, values, n)
96
+ bcf_update_format(hdr, line, key, values, n,
97
+ BCF_HT_STR)
98
+ end
99
+
100
+ def bcf_update_genotypes(hdr, line, gts, n)
101
+ bcf_update_format(hdr, line, "GT", gts, n, BCF_HT_INT)
102
+ end
103
+
104
+ def bcf_gt_phased(idx)
105
+ ((idx + 1) << 1 | 1)
106
+ end
107
+
108
+ def bcf_gt_unphased(idx)
109
+ ((idx + 1) << 1)
110
+ end
111
+
112
+ def bcf_gt_missing
113
+ 0
114
+ end
115
+
116
+ def bcf_gt_is_missing(val)
117
+ ((val) >> 1 ? 0 : 1)
118
+ end
119
+
120
+ def bcf_gt_is_phased(idx)
121
+ ((idx) & 1)
122
+ end
123
+
124
+ def bcf_gt_allele(val)
125
+ (((val) >> 1) - 1)
126
+ end
127
+
128
+ def bcf_alleles2gt(a, b)
129
+ ((a) > (b) ? (a * (a + 1) / 2 + b) : (b * (b + 1) / 2 + a))
130
+ end
131
+
132
+ def bcf_get_info_int32(hdr, line, tag, dst, ndst)
133
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
134
+ end
135
+
136
+ def bcf_get_info_float(hdr, line, tag, dst, ndst)
137
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
138
+ end
139
+
140
+ def bcf_get_info_string(hdr, line, tag, dst, ndst)
141
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
142
+ end
143
+
144
+ def bcf_get_info_flag(hdr, line, tag, dst, ndst)
145
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_FLAG)
146
+ end
147
+
148
+ def bcf_get_format_int32(hdr, line, tag, dst, ndst)
149
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
150
+ end
151
+
152
+ def bcf_get_format_float(hdr, line, tag, dst, ndst)
153
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
154
+ end
155
+
156
+ def bcf_get_format_char(hdr, line, tag, dst, ndst)
157
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
158
+ end
159
+
160
+ def bcf_get_genotypes(hdr, line, dst, ndst)
161
+ bcf_get_format_values(hdr, line, "GT", dst, ndst, BCF_HT_INT)
162
+ end
163
+
164
+ def bcf_hdr_int2id(hdr, type, int_id)
165
+ LibHTS::BcfIdpair.new(
166
+ hdr[:id][type].to_ptr +
167
+ LibHTS::BcfIdpair.size * int_id # offsets
168
+ )[:key]
169
+ end
170
+ end
171
+
172
+ # constants
173
+ BCF_UN_STR = 1 # up to ALT inclusive
174
+ BCF_UN_FLT = 2 # up to FILTER
175
+ BCF_UN_INFO = 4 # up to INFO
176
+ BCF_UN_SHR = (BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO) # all shared information
177
+ BCF_UN_FMT = 8 # unpack format and each sample
178
+ BCF_UN_IND = BCF_UN_FMT # a synonym of BCF_UN_FMT
179
+ BCF_UN_ALL = (BCF_UN_SHR | BCF_UN_FMT) # everything
180
+
5
181
  attach_function \
6
182
  :bcf_hdr_init,
7
183
  [:string],
@@ -14,7 +190,7 @@ module HTS
14
190
 
15
191
  attach_function \
16
192
  :bcf_init,
17
- [:void],
193
+ [],
18
194
  Bcf1.by_ref
19
195
 
20
196
  attach_function \
@@ -54,7 +230,7 @@ module HTS
54
230
 
55
231
  attach_function \
56
232
  :vcf_parse,
57
- [Kstring, BcfHdr, Bcf1],
233
+ [KString, BcfHdr, Bcf1],
58
234
  :int
59
235
 
60
236
  attach_function \
@@ -64,7 +240,7 @@ module HTS
64
240
 
65
241
  attach_function \
66
242
  :vcf_format,
67
- [BcfHdr, Bcf1, Kstring],
243
+ [BcfHdr, Bcf1, KString],
68
244
  :int
69
245
 
70
246
  attach_function \
@@ -119,7 +295,7 @@ module HTS
119
295
 
120
296
  attach_function \
121
297
  :vcf_write_line,
122
- [HtsFile, Kstring],
298
+ [HtsFile, KString],
123
299
  :int
124
300
 
125
301
  attach_function \
@@ -144,7 +320,7 @@ module HTS
144
320
 
145
321
  attach_function \
146
322
  :bcf_hdr_format,
147
- [BcfHdr, :int, Kstring],
323
+ [BcfHdr, :int, KString],
148
324
  :int
149
325
 
150
326
  attach_function \
@@ -204,7 +380,7 @@ module HTS
204
380
 
205
381
  attach_function \
206
382
  :bcf_hrec_format,
207
- [BcfHrec, Kstring],
383
+ [BcfHrec, KString],
208
384
  :int
209
385
 
210
386
  attach_function \
@@ -369,27 +545,27 @@ module HTS
369
545
 
370
546
  attach_function \
371
547
  :bcf_fmt_array,
372
- [Kstring, :int, :int, :pointer],
548
+ [KString, :int, :int, :pointer],
373
549
  :int
374
550
 
375
551
  attach_function \
376
552
  :bcf_fmt_sized_array,
377
- [Kstring, :pointer],
553
+ [KString, :pointer],
378
554
  :uint8_t
379
555
 
380
556
  attach_function \
381
557
  :bcf_enc_vchar,
382
- [Kstring, :int, :string],
558
+ [KString, :int, :string],
383
559
  :int
384
560
 
385
561
  attach_function \
386
562
  :bcf_enc_vint,
387
- [Kstring, :int, :pointer, :int],
563
+ [KString, :int, :pointer, :int],
388
564
  :int
389
565
 
390
566
  attach_function \
391
567
  :bcf_enc_vfloat,
392
- [Kstring, :int, :pointer],
568
+ [KString, :int, :pointer],
393
569
  :int
394
570
 
395
571
  attach_function \
data/lib/hts/libhts.rb ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ffi_ext/struct"
4
+
5
+ module HTS
6
+ module LibHTS
7
+ extend FFI::Library
8
+
9
+ begin
10
+ ffi_lib HTS.lib_path
11
+ rescue LoadError => e
12
+ raise LoadError, "#{e}\nCould not find #{HTS.lib_path}"
13
+ end
14
+
15
+ def self.attach_function(*)
16
+ super
17
+ rescue FFI::NotFoundError => e
18
+ warn e.message
19
+ end
20
+ end
21
+ end
22
+
23
+ require_relative "libhts/constants"
24
+
25
+ # This is alphabetical order.
26
+ require_relative "libhts/bgzf"
27
+ require_relative "libhts/faidx"
28
+ require_relative "libhts/hfile"
29
+ require_relative "libhts/hts"
30
+ require_relative "libhts/sam"
31
+ require_relative "libhts/kfunc"
32
+ require_relative "libhts/tbx"
33
+ require_relative "libhts/vcf"
data/lib/hts/tabix.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ require_relative "utils/open_method"
7
+
8
+ module HTS
9
+ class Tabix
10
+ extend Utils::OpenMethod
11
+
12
+ def initialize; end
13
+
14
+ def build; end
15
+
16
+ def sequences; end
17
+
18
+ # def __call__\
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ module Utils
5
+ module OpenMethod
6
+ def open(path)
7
+ object = new(path)
8
+ if block_given?
9
+ yield(object)
10
+ object.close
11
+ else
12
+ object
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
data/lib/hts/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- VERSION = '0.0.0'
4
+ VERSION = "0.0.4"
5
5
  end
data/lib/htslib.rb CHANGED
@@ -1,18 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'ffi'
3
+ require "ffi"
4
4
 
5
- require 'hts/version'
5
+ require "hts/version"
6
6
 
7
7
  module HTS
8
8
  class Error < StandardError; end
9
9
 
10
10
  class << self
11
- attr_accessor :ffi_lib
11
+ attr_accessor :lib_path
12
+
13
+ def search_htslib(name = nil)
14
+ name ||= "libhts.#{FFI::Platform::LIBSUFFIX}"
15
+ lib_path = if ENV["HTSLIBDIR"]
16
+ File.expand_path(name, ENV["HTSLIBDIR"])
17
+ else
18
+ File.expand_path("../vendor/#{name}", __dir__)
19
+ end
20
+ return lib_path if File.exist?(lib_path)
21
+
22
+ begin
23
+ require "pkg-config"
24
+ lib_dir = PKGConfig.variable("htslib", "libdir")
25
+ lib_path = File.expand_path(name, lib_dir)
26
+ rescue PackageConfig::NotFoundError
27
+ warn "htslib.pc was not found in the pkg-config search path."
28
+ end
29
+ return lib_path if File.exist?(lib_path)
30
+
31
+ warn "htslib shared library '#{name}' not found."
32
+ end
12
33
  end
13
- self.ffi_lib = File.expand_path("libhts.#{FFI::Platform::LIBSUFFIX}", ENV['HTSLIBDIR'])
14
- autoload :FFI, 'hts/ffi'
34
+
35
+ self.lib_path = search_htslib
36
+
37
+ # You can change the path of the shared library with `HTS.lib_path=`
38
+ # before calling the LibHTS module.
39
+ autoload :LibHTS, "hts/libhts"
15
40
  end
16
41
 
17
- # alias
18
- HTSlib = HTS
42
+ require_relative "hts/bam"
43
+ require_relative "hts/faidx"
44
+ require_relative "hts/tabix"
45
+ require_relative "hts/bcf"